In [1185]:
import pandas
import numpy
from pyDecision.algorithm import bw_method

In [1186]:
cei_metric = pandas.read_parquet("Data/CEI Metric/")
cei_code = dict(zip(cei_metric["cei_code"].unique(),["EOL OS","Weekly","Weekly","Daily"]))
cei_metric["cei_code"] = cei_metric["cei_code"].apply(lambda x:cei_code[x])
cei_data = cei_metric.fillna("-").groupby(["host_type","accessibility","cei_code"])["cei_status"].value_counts(normalize=True)
cei_data = cei_data.to_frame()
cei_data.columns = ["failure_count"]
cei_data = cei_data.reset_index()
cei_data = cei_data[cei_data["cei_status"] == "Failed"]
cei_data["host_type"] = cei_data["host_type"].str.replace("Server","Servers")

In [1187]:
cei_failures = cei_data.set_index(["host_type","accessibility","cei_code"])["failure_count"].unstack().fillna(0).reset_index()

In [1188]:
cei_failures["Fortnightly"] = 0.0

In [1189]:
cei_failures["asset_class"] = cei_failures.fillna("-").apply(lambda row: row["host_type"] + "_" + row["accessibility"],axis=1).str.replace("_-","")

In [1190]:
cei_failures = cei_failures.drop(["host_type","accessibility"],axis=1).set_index("asset_class")

In [1191]:
risk = pandas.read_csv("Data/Risk Matrix.csv")

In [1192]:
risk["asset_class"] = risk.apply(lambda row: row["host_type"] + "_" + row["accessibility"],axis=1).str.replace("_-","")

In [1193]:
risk = risk.drop(["host_type","accessibility"],axis=1).set_index("asset_class")

In [1194]:
risk_effective = cei_failures*risk

In [1195]:
#risk.to_csv("risk_matrix.csv")

In [1196]:
risk_values = risk_effective.values

In [1197]:
risk

Unnamed: 0_level_0,Fortnightly,Weekly,Daily,EOL OS
asset_class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Servers_External,16,12,8,20
Servers_Internal,12,9,6,15
Workstation,12,9,6,15


In [1198]:
mic = numpy.array([7,7,9,1])
lic = numpy.array([7,7,1,9])

In [1199]:
weights =  bw_method(risk_values, mic, lic, size = 15, iterations = 100)

Iteration =  0  f(x) =  inf
Iteration =  1  f(x) =  1.968133978645697
Iteration =  2  f(x) =  1.968133978645697
Iteration =  3  f(x) =  1.1876711748364919
Iteration =  4  f(x) =  0.7763010033719102
Iteration =  5  f(x) =  0.21384215561081849
Iteration =  6  f(x) =  0.1051244481635259
Iteration =  7  f(x) =  0.05854546886882356
Iteration =  8  f(x) =  0.05854546886882356
Iteration =  9  f(x) =  0.05854546886882356
Iteration =  10  f(x) =  0.05854546886882356
Iteration =  11  f(x) =  0.05854546886882356
Iteration =  12  f(x) =  0.05854546886882356
Iteration =  13  f(x) =  0.0447217602671246
Iteration =  14  f(x) =  0.0447217602671246
Iteration =  15  f(x) =  0.04460327232771244
Iteration =  16  f(x) =  0.04460327232771244
Iteration =  17  f(x) =  0.04460327232771244
Iteration =  18  f(x) =  0.04460327232771244
Iteration =  19  f(x) =  0.04460327232771244
Iteration =  20  f(x) =  0.04460327232771244
Iteration =  21  f(x) =  0.04460327232771244
Iteration =  22  f(x) =  0.04460327232771244


In [21]:
cei_severity = pandas.DataFrame([1,4,4,2,3],index=["CEI 1.0","CEI 1.1","CEI 1.2","CEI 1.3","CEI 1.4"],columns=["CEI Severity"])

In [23]:
cei_severity.T

Unnamed: 0,CEI 1.0,CEI 1.1,CEI 1.2,CEI 1.3,CEI 1.4
CEI Severity,1,4,4,2,3


In [50]:
object_severity = pandas.DataFrame([4,2,2],index=["External Server","Workstation","Internal Server"])

In [55]:
object_severity.index

Index(['External Server', 'Workstation', 'Internal Server'], dtype='object')

In [56]:
pandas.DataFrame(cei_severity.T.values * object_severity.values,columns=cei_severity.index,index=object_severity.index)

Unnamed: 0,CEI 1.0,CEI 1.1,CEI 1.2,CEI 1.3,CEI 1.4
External Server,4,16,16,8,12
Workstation,2,8,8,4,6
Internal Server,2,8,8,4,6


In [79]:
cei_severity["CEI Severity"].rank(method="dense").values

array([1., 4., 4., 2., 3.])

In [81]:
cei_severity["CEI Severity"].rank(method="dense",ascending=False).values

array([4., 1., 1., 3., 2.])

In [18]:
import numpy,scipy,pandas

In [4]:
from scipy.stats import rankdata

In [16]:
numpy.argsort(cei_severity) + 1

array([1, 4, 5, 2, 3])

In [1200]:
weights

array([0.1005108 , 0.10078584, 0.08152843, 0.71717493])

In [1201]:
weights_scaled = numpy.round(weights,decimals=1)

In [1202]:
sum(weights_scaled)

1.0

# TOPSIS

In [1203]:
from sklearn import preprocessing
min_max_scaler = preprocessing.MinMaxScaler()
risk_scaled = min_max_scaler.fit_transform(risk_values)

In [1204]:
import mcdm
x_benefit = [True,True,True,True]
priority_rank = mcdm.rank(risk_scaled, w_vector=weights_scaled, s_method="TOPSIS",is_benefit_x = x_benefit)

In [1205]:
priority_rank

[('a2', 0.8319256396465603),
 ('a3', 0.46699293399353087),
 ('a1', 0.12389934309929543)]

In [1206]:
weights

array([0.1005108 , 0.10078584, 0.08152843, 0.71717493])

In [1232]:
temp =[]
for i in range(0, len(priority_rank)):
    for j in range(0, len(weights)):
        temp.append(1/(-numpy.log10((priority_rank[i][1])*weights[j])))

In [1233]:
risk.index

Index(['Servers_External', 'Servers_Internal', 'Workstation'], dtype='object', name='asset_class')

In [1234]:
criteria_names = risk.columns
alternative_names = risk.index

In [1235]:
temp2=[]
for i in range(0, len(alternative_names)):
    for j in range(0, len(criteria_names)):
        temp2.append(alternative_names[i] + ":" + criteria_names[j])

In [1236]:
list_zip = zip(temp2, temp)
zipped_list = list(list_zip)

In [1237]:
sorted_priority = sorted(zipped_list, key=lambda x: x[1], reverse=True )

In [1238]:
sorted_priority = dict(sorted_priority)

In [1239]:
risk

Unnamed: 0_level_0,Fortnightly,Weekly,Daily,EOL OS
asset_class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Servers_External,16,12,8,20
Servers_Internal,12,9,6,15
Workstation,12,9,6,15


In [1240]:
priority_df = pandas.DataFrame.from_dict(sorted_priority.items())

In [1241]:
priority_df.columns = ["asset_type_cei","priority_score"]

In [1242]:
priority_df

Unnamed: 0,asset_type_cei,priority_score
0,Servers_External:EOL OS,4.458506
1,Servers_Internal:EOL OS,2.104977
2,Workstation:EOL OS,0.951198
3,Servers_External:Weekly,0.928923
4,Servers_External:Fortnightly,0.9279
5,Servers_External:Daily,0.85572
6,Servers_Internal:Weekly,0.753415
7,Servers_Internal:Fortnightly,0.752742
8,Servers_Internal:Daily,0.704533
9,Workstation:Weekly,0.525339


In [1243]:
priority_df[["asset_type","CEI"]] = priority_df["asset_type_cei"].apply(lambda x: pandas.Series(x.split(":")))

In [1244]:
priority_df[["host_type","accessibility"]] = priority_df["asset_type"].apply(lambda x: pandas.Series(x.split("_")))

In [1245]:
priority_df = priority_df[["host_type","accessibility","CEI","priority_score"]]

In [1246]:
priority_df["priority_rank"] = priority_df["priority_score"].rank(ascending=False,method="min")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  priority_df["priority_rank"] = priority_df["priority_score"].rank(ascending=False,method="min")


In [1247]:
priority_df

Unnamed: 0,host_type,accessibility,CEI,priority_score,priority_rank
0,Servers,External,EOL OS,4.458506,1.0
1,Servers,Internal,EOL OS,2.104977,2.0
2,Workstation,,EOL OS,0.951198,3.0
3,Servers,External,Weekly,0.928923,4.0
4,Servers,External,Fortnightly,0.9279,5.0
5,Servers,External,Daily,0.85572,6.0
6,Servers,Internal,Weekly,0.753415,7.0
7,Servers,Internal,Fortnightly,0.752742,8.0
8,Servers,Internal,Daily,0.704533,9.0
9,Workstation,,Weekly,0.525339,10.0


In [1248]:
priority_df.columns = ["host_type","accessibility","cei_code","priority_score","priority_rank"]

In [1249]:
#priority_df.to_csv("priority.csv",index=False)

# CEI Metric Data

In [1250]:
# cei_data = cei_metric.fillna("").groupby(["host_type","accessibility","cei_code"])["cei_status"].value_counts(normalize=True)
# cei_data = cei_data.to_frame()
# cei_data.columns = ["failure_count"]
# cei_data = cei_data.reset_index()

In [1251]:
# cei_data = cei_data[cei_data["cei_status"] == "Failed"]

In [1252]:
# set(cei_data.columns).intersection(priority_df.columns)

{'accessibility', 'cei_code', 'host_type'}

In [1253]:
# cei_data

Unnamed: 0,host_type,accessibility,cei_code,cei_status,failure_count
1,Server,External,Daily,Failed,0.25
4,Server,External,Weekly,Failed,0.0875
7,Server,Internal,Weekly,Failed,0.25
9,Workstation,,EOL OS,Failed,0.321429
11,Workstation,,Weekly,Failed,0.178571


In [1254]:
#priority_df

Unnamed: 0,host_type,accessibility,cei_code,priority_score,priority_rank
0,Servers,External,EOL OS,4.458506,1.0
1,Servers,Internal,EOL OS,2.104977,2.0
2,Workstation,,EOL OS,0.951198,3.0
3,Servers,External,Weekly,0.928923,4.0
4,Servers,External,Fortnightly,0.9279,5.0
5,Servers,External,Daily,0.85572,6.0
6,Servers,Internal,Weekly,0.753415,7.0
7,Servers,Internal,Fortnightly,0.752742,8.0
8,Servers,Internal,Daily,0.704533,9.0
9,Workstation,,Weekly,0.525339,10.0


In [1255]:
#priority_df["host_type"] = priority_df["host_type"].str.replace("Servers","Server")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  priority_df["host_type"] = priority_df["host_type"].str.replace("Servers","Server")


In [1256]:
#cei_data = cei_data.fillna("").merge(priority_df.fillna(""),on=['accessibility', 'cei_code', 'host_type'],how="left")

In [1257]:
#cei_data["effective_score"] = cei_data["failure_count"] * cei_data["priority_score"]

In [1258]:
#cei_data["effective_rank"] = cei_data["effective_score"].rank(ascending=False)

In [1259]:
#cei_data = cei_data.sort_values("effective_rank")

In [1260]:
#cei_data.to_csv("effective_rank.csv",index=False)

In [1261]:
#output  = cei_data[["host_type","accessibility","cei_code","failure_count","priority_rank","effective_rank"]]

In [1262]:
#cei_data

Unnamed: 0,host_type,accessibility,cei_code,cei_status,failure_count,priority_score,priority_rank,effective_score,effective_rank
3,Workstation,,EOL OS,Failed,0.321429,0.951198,3.0,0.305742,1.0
0,Server,External,Daily,Failed,0.25,0.85572,6.0,0.21393,2.0
2,Server,Internal,Weekly,Failed,0.25,0.753415,7.0,0.188354,3.0
4,Workstation,,Weekly,Failed,0.178571,0.525339,10.0,0.093811,4.0
1,Server,External,Weekly,Failed,0.0875,0.928923,4.0,0.081281,5.0


In [1183]:
#risk

Unnamed: 0_level_0,Fortnightly,Weekly,Daily,EOL OS
asset_class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Servers_External,16,12,8,20
Servers_Internal,12,9,6,15
Workstation,12,9,6,15


In [1184]:
#priority_df.to_csv("priority_score.csv",index=False)