In [1]:
import pandas as pd
from collections import defaultdict

In [2]:
df = pd.read_csv("data.csv")
reqd = ["ResourceGroupId", "Feature", "CategoryName", "VerificationResult", "ControlStringId"]
df = df[reqd]

In [3]:
df.head()


Unnamed: 0,ResourceGroupId,Feature,CategoryName,VerificationResult,ControlStringId
0,2,SQLDatabase,Storage,Verify,Azure_SQLDatabase_AuthZ_Firewall_Deny_Access_A...
1,2,SQLDatabase,DataProcessing,Verify,Azure_SQLDatabase_AuthZ_Firewall_Deny_Access_A...
2,2,SQLDatabase,Reporting,Verify,Azure_SQLDatabase_AuthZ_Firewall_Deny_Access_A...
3,2,AppService,Web Front End,Verify,Azure_AppService_AuthZ_Grant_Min_RBAC_Access
4,2,AppService,APIs,Verify,Azure_AppService_AuthZ_Grant_Min_RBAC_Access


In [4]:
# Create combination dict
feature_combinations = defaultdict(set)
for idx, row in df.iterrows():
    feature_combinations[row["ResourceGroupId"]].add(row["Feature"])

In [5]:
feature_combinations

defaultdict(set,
            {2: {'AppService',
              'Automation',
              'EventHub',
              'KeyVault',
              'SQLDatabase',
              'Storage',
              'StreamAnalytics'},
             6: {'AppService',
              'Automation',
              'EventHub',
              'KeyVault',
              'SQLDatabase',
              'Storage',
              'StreamAnalytics'},
             8: {'AppService', 'KeyVault', 'LogicApps', 'Storage'},
             260: {'AppService', 'EventHub', 'KeyVault', 'Storage'},
             261: {'AppService', 'EventHub', 'KeyVault', 'Storage'},
             361: {'AppService', 'KeyVault', 'LogicApps', 'Storage'},
             362: {'AppService',
              'Automation',
              'EventHub',
              'KeyVault',
              'SQLDatabase',
              'Storage',
              'StreamAnalytics'},
             363: {'AppService', 'KeyVault', 'LogicApps', 'Storage'},
             364: {'AppService',
     

In [6]:
failures = defaultdict(dict)
for idx, row in df.iterrows():
    totals = failures[row["ResourceGroupId"]].setdefault("Totals", 0)
    fails = failures[row["ResourceGroupId"]].setdefault("Fails", 0)
    success = failures[row["ResourceGroupId"]].setdefault("Success", 0)
    failures[row["ResourceGroupId"]]["Totals"] = totals + 1 
    if row["VerificationResult"] == "Passed":
        failures[row["ResourceGroupId"]]["Success"] = success + 1
    else:
        failures[row["ResourceGroupId"]]["Fails"] = fails + 1

In [7]:
feature_hash_map = {
    "SQLDatabase": 3940427,
    "AppService": 3940763,
    "StreamAnalytics": 1414297,
    "KeyVault": 3125831,
    "Storage": 5392313,
    "Automation": 6305339,
    "EventHub": 7368719,
    "LogicApps": 7368629,
    "TrafficManager": 7368787,
    "VirtualNetwork": 2523893,
    "DataLakeStore": 4284113,
    "CosmosDB": 5602973,
    "RedisCache": 5603713,
}

category_hash_map = {
    "Storage" : 1000003,
    "DataProcessing" : 1000033,
    "Reporting" : 1000037,
    "Web Front End" : 1000039,
    "APIs" : 1000081,
    "Security Infra" : 1000099,
    "SubscriptionCore" : 1000117,
    "Commuincation Hub" : 1000121,
    "Hybrid" : 1000133,
    "Network Isolation" : 1000151,
    "Cache" : 1000159,
    "Backend Processing": 123123593,
}

parent_map = {
    "AppService": ["Web Front End", "APIs"],
    "SQLDatabase": ["Storage", "DataProcessing", "Reporting"],
    "Storage": ["Storage", "Reporting", "DataProcessing"],
    "LogicApps": ["DataProcessing"],
    "DataFactory": ["DataProcessing"],
    "DataLakeAnalytics": ["DataProcessing", "Reporting"],
    "DataLakeStore": ["Storage", "Reporting", "DataProcessing"],
    "NotificationHub": ["Commuincation Hub"],
    "ServiceFabric": ["Web Front End", "APIs", "Backend Processing"],
    "Search": ["APIs", "Backend Processing"],
    "VirtualMachine": ["Web Front End", "APIs", "Backend Processing", "DataProcessing"],
    "VirtualNetwork": ["Network Isolation", "Hybrid"],
    "AnalysisServices": ["DataProcessing", "Reporting"],
    "Batch": ["Backend Processing"],
    "RedisCache": ["Cache"],
    "EventHub": ["Commuincation Hub", "Hybrid"],
    "ODG": ["Hybrid"],
    "TrafficManager": ["Network Isolation"],
    "ERvNet": ["Hybrid", "Network Isolation"],
    "Automation": ["Backend Processing"],
    "CosmosDB": ["Storage", "DataProcessing", "Reporting"],
    "StreamAnalytics": ["DataProcessing", "Reporting"],
    "CloudService": ["Web Front End", "APIs", "Backend Processing"],
    "LoadBalancer": ["Network Isolation"],
    "APIConnection": ["DataProcessing"],
    "BotService": ["APIs", "Commuincation Hub", "Web Front End"],
    "ContainerInstances": ["Web Front End", "APIs", "DataProcessing", "Backend Processing"],
    "DataFactoryV2": ["DataProcessing", "Backend Processing"],
    "KeyVault": ["Security Infra"]
}

BIG_PRIME = 824633720831

def get_feature_hash(features):
    hash_val = 1
    for feature in features:
        hash_val *= feature_hash_map[feature]
        hash_val %= BIG_PRIME
    return hash_val

def get_category_hash(categories):
    hash_val = 1
    for category in categories:
        hash_val *= category_hash_map[category]
        hash_val %= BIG_PRIME
    return hash_val

def get_parents_list(features):
    parents = []
    for feature in features:
        parents.append(parent_map[feature][0])
    return parents

In [8]:
# df["Feature"].unique()
get_parents_list(['AppService', 'StreamAnalytics', 'KeyVault', 'Storage', 'SQLDatabase'])

['Web Front End', 'DataProcessing', 'Security Infra', 'Storage', 'Storage']

In [9]:
get_feature_hash(['AppService', 'StreamAnalytics', 'KeyVault', 'Storage', 'SQLDatabase'])

397933297130

In [10]:
master_hash_table = dict()
for res_id in feature_combinations:
    features = feature_combinations[res_id]
    feature_hash = get_feature_hash(features)
    int_list = master_hash_table.setdefault(feature_hash, {"features": features, "counts": 0, "info": failures[res_id]})
    int_list["counts"] += 1
    # master_hash_table[feature_hash] = int_list

In [11]:
for x in master_hash_table:
    print(master_hash_table[x])
    print("*" * 100)

{'features': {'StreamAnalytics', 'AppService', 'SQLDatabase', 'EventHub', 'Storage', 'KeyVault', 'Automation'}, 'counts': 6, 'info': {'Totals': 709, 'Fails': 448, 'Success': 261}}
****************************************************************************************************
{'features': {'AppService', 'Storage', 'KeyVault', 'LogicApps'}, 'counts': 6, 'info': {'Totals': 285, 'Fails': 193, 'Success': 92}}
****************************************************************************************************
{'features': {'AppService', 'KeyVault', 'EventHub', 'Storage'}, 'counts': 2, 'info': {'Totals': 363, 'Fails': 221, 'Success': 142}}
****************************************************************************************************
{'features': {'EventHub', 'Automation'}, 'counts': 1, 'info': {'Totals': 34, 'Fails': 31, 'Success': 3}}
****************************************************************************************************
{'features': {'TrafficManager'}, 'counts': 5, '

In [17]:
list_ = ['StreamAnalytics', 'Storage', 'Automation']
master_category_table = dict()
updated = False
parent_feature_combo_table = defaultdict(list)


def recurse(my_list, hash_cache, info, string_cache, feature_info):
    global updated
    if my_list:
        for parent in parent_map[my_list[0]]:
            recurse(my_list[1:], (hash_cache * category_hash_map[parent]) % BIG_PRIME, info, parent + " -> " + string_cache,
                    feature_info)
    else:
        to_insert = dict()
        if hash_cache in master_category_table and not updated:
            # ADD VALUES
            previous_info = master_category_table[hash_cache]
            to_insert["Totals"] = previous_info["Totals"] + info["Totals"]
            to_insert["Fails"] = previous_info["Fails"] + info["Fails"]
            to_insert["Success"] = previous_info["Success"] + info["Success"]
        else:
            # FIRST TIME
            to_insert["Totals"] = info["Totals"]
            to_insert["Fails"] = info["Fails"]
            to_insert["Success"] = info["Success"]
        master_category_table[hash_cache] = to_insert
        updated = True
        parents = string_cache.split(" -> ")[:-1]
        parents_hash = get_category_hash(parents)
        parent_feature_combo_table[parents_hash].append(feature_info)
        print("Category combination: {}".format(string_cache))
        print("*" * 50)
    print("#" * 70)

# recurse(list_, 1, {'Totals': 363, 'Fails': 221, 'Success': 142}, "")

In [18]:
for x in master_hash_table:
    global updated
    updated = False
    feature_info = {
        "features": list(master_hash_table[x]["features"]),
        "info": master_hash_table[x]["info"]
    }
    recurse(list(master_hash_table[x]["features"]), 1, master_hash_table[x]["info"], "", feature_info)

Category combination: Backend Processing -> Security Infra -> Storage -> Commuincation Hub -> Storage -> Web Front End -> DataProcessing -> 
**************************************************
######################################################################
######################################################################
######################################################################
Category combination: Backend Processing -> Security Infra -> Reporting -> Commuincation Hub -> Storage -> Web Front End -> DataProcessing -> 
**************************************************
######################################################################
######################################################################
######################################################################
Category combination: Backend Processing -> Security Infra -> DataProcessing -> Commuincation Hub -> Storage -> Web Front End -> DataProcessing -> 
****************************************

In [14]:
master_category_table

{113166132900: {'Totals': 709, 'Fails': 448, 'Success': 261},
 68872439214: {'Totals': 709, 'Fails': 448, 'Success': 261},
 365130657588: {'Totals': 709, 'Fails': 448, 'Success': 261},
 236601047911: {'Totals': 709, 'Fails': 448, 'Success': 261},
 292005345598: {'Totals': 709, 'Fails': 448, 'Success': 261},
 91455729204: {'Totals': 709, 'Fails': 448, 'Success': 261},
 802699008668: {'Totals': 709, 'Fails': 448, 'Success': 261},
 508681049473: {'Totals': 709, 'Fails': 448, 'Success': 261},
 233072762726: {'Totals': 709, 'Fails': 448, 'Success': 261},
 458951264909: {'Totals': 709, 'Fails': 448, 'Success': 261},
 460737900152: {'Totals': 709, 'Fails': 448, 'Success': 261},
 170239503621: {'Totals': 709, 'Fails': 448, 'Success': 261},
 125603403433: {'Totals': 709, 'Fails': 448, 'Success': 261},
 613456221099: {'Totals': 709, 'Fails': 448, 'Success': 261},
 798601101618: {'Totals': 709, 'Fails': 448, 'Success': 261},
 469217891605: {'Totals': 709, 'Fails': 448, 'Success': 261},
 781883711

In [15]:
test_features = ['AppService', 'Storage']
master_hash_table[get_feature_hash(test_features)]

KeyError: 633984534044

In [18]:
master_category_table[get_category_hash(get_parents_list(test_features))]

KeyError: 175408279286

In [27]:
def get_feature_safety(features):
    print("Features: {}".format(features))
    feature_info = master_hash_table[get_feature_hash(features)]
    print("Possible Parents: {}".format(get_parents_list(features)))
    category_info = master_category_table[get_category_hash(get_parents_list(features))]
    print("Feature info: {}".format(feature_info["info"]))
    print("Category info: {}".format(category_info))
    print("Fail percentage: {0:.2f}%".format(feature_info["info"]["Fails"] / feature_info["info"]["Totals"] * 100))

In [28]:
get_feature_safety(test_features)

Features: ['KeyVault', 'AppService', 'Storage', 'EventHub']
Possible Parents: ['Security Infra', 'Web Front End', 'Storage', 'Commuincation Hub']
Feature info: {'Totals': 363, 'Fails': 221, 'Success': 142}
Category info: {'Totals': 363, 'Fails': 221, 'Success': 142}
Fail percentage: 60.88%


In [19]:
parent_feature_combo_table

defaultdict(list,
            {113166132900: [{'features': ['StreamAnalytics',
                'AppService',
                'SQLDatabase',
                'EventHub',
                'Storage',
                'KeyVault',
                'Automation'],
               'info': {'Totals': 709, 'Fails': 448, 'Success': 261}}],
             68872439214: [{'features': ['StreamAnalytics',
                'AppService',
                'SQLDatabase',
                'EventHub',
                'Storage',
                'KeyVault',
                'Automation'],
               'info': {'Totals': 709, 'Fails': 448, 'Success': 261}},
              {'features': ['StreamAnalytics',
                'AppService',
                'SQLDatabase',
                'EventHub',
                'Storage',
                'KeyVault',
                'Automation'],
               'info': {'Totals': 709, 'Fails': 448, 'Success': 261}},
              {'features': ['StreamAnalytics',
                'AppService'

In [24]:
"Backend Processing -> Security Infra -> Storage -> Hybrid -> Reporting -> APIs -> DataProcessing -> ".split(" -> ")

['Backend Processing',
 'Security Infra',
 'Storage',
 'Hybrid',
 'Reporting',
 'APIs',
 'DataProcessing',
 '']

In [27]:
def score(value):
    num = value["info"]["Fails"]
    den = value["info"]["Totals"]
    return num / den

    
def get_safest_feature(categories):
    parent_hash = get_category_hash(categories)
    value = parent_feature_combo_table[parent_hash]
    print("Combos: {}".format(value))
    best_feature = None
    for x in value:
        if best_feature is None or score(x) < score(best_feature):
            best_feature = x
    return best_feature

parents_list = ["Network Isolation"]

get_safest_feature(parents_list)

Combos: [{'features': ['TrafficManager'], 'info': {'Totals': 2, 'Fails': 1, 'Success': 1}}, {'features': ['VirtualNetwork'], 'info': {'Totals': 132, 'Fails': 24, 'Success': 108}}]


{'features': ['VirtualNetwork'],
 'info': {'Totals': 132, 'Fails': 24, 'Success': 108}}