In [1]:
import pandas as pd
from collections import defaultdict

In [2]:
df = pd.read_csv("data.csv")
reqd = ["ResourceGroupId", "Feature", "CategoryName", "VerificationResult", "ControlStringId"]
df = df[reqd]

In [3]:
df.head()


Unnamed: 0,ResourceGroupId,Feature,CategoryName,VerificationResult,ControlStringId
0,2,SQLDatabase,Storage,Verify,Azure_SQLDatabase_AuthZ_Firewall_Deny_Access_A...
1,2,SQLDatabase,DataProcessing,Verify,Azure_SQLDatabase_AuthZ_Firewall_Deny_Access_A...
2,2,SQLDatabase,Reporting,Verify,Azure_SQLDatabase_AuthZ_Firewall_Deny_Access_A...
3,2,AppService,Web Front End,Verify,Azure_AppService_AuthZ_Grant_Min_RBAC_Access
4,2,AppService,APIs,Verify,Azure_AppService_AuthZ_Grant_Min_RBAC_Access


In [4]:
# Create combination dict
feature_combinations = defaultdict(set)
for idx, row in df.iterrows():
    feature_combinations[row["ResourceGroupId"]].add(row["Feature"])

In [5]:
feature_combinations

defaultdict(set,
            {2: {'AppService',
              'Automation',
              'EventHub',
              'KeyVault',
              'SQLDatabase',
              'Storage',
              'StreamAnalytics'},
             6: {'AppService',
              'Automation',
              'EventHub',
              'KeyVault',
              'SQLDatabase',
              'Storage',
              'StreamAnalytics'},
             8: {'AppService', 'KeyVault', 'LogicApps', 'Storage'},
             260: {'AppService', 'EventHub', 'KeyVault', 'Storage'},
             261: {'AppService', 'EventHub', 'KeyVault', 'Storage'},
             361: {'AppService', 'KeyVault', 'LogicApps', 'Storage'},
             362: {'AppService',
              'Automation',
              'EventHub',
              'KeyVault',
              'SQLDatabase',
              'Storage',
              'StreamAnalytics'},
             363: {'AppService', 'KeyVault', 'LogicApps', 'Storage'},
             364: {'AppService',
     

In [6]:
failures = defaultdict(dict)
for idx, row in df.iterrows():
    totals = failures[row["ResourceGroupId"]].setdefault("Totals", 0)
    fails = failures[row["ResourceGroupId"]].setdefault("Fails", 0)
    success = failures[row["ResourceGroupId"]].setdefault("Success", 0)
    failures[row["ResourceGroupId"]]["Totals"] = totals + 1 
    if row["VerificationResult"] == "Passed":
        failures[row["ResourceGroupId"]]["Success"] = success + 1
    else:
        failures[row["ResourceGroupId"]]["Fails"] = fails + 1

In [7]:
feature_hash_map = {
    "SQLDatabase": 3940427,
    "AppService": 3940763,
    "StreamAnalytics": 1414297,
    "KeyVault": 3125831,
    "Storage": 5392313,
    "Automation": 6305339,
    "EventHub": 7368719,
    "LogicApps": 7368629,
    "TrafficManager": 7368787,
    "VirtualNetwork": 2523893,
    "DataLakeStore": 4284113,
    "CosmosDB": 5602973,
    "RedisCache": 5603713,
}

category_hash_map = {
    "Storage" : 1000003,
    "DataProcessing" : 1000033,
    "Reporting" : 1000037,
    "Web Front End" : 1000039,
    "APIs" : 1000081,
    "Security Infra" : 1000099,
    "SubscriptionCore" : 1000117,
    "Commuincation Hub" : 1000121,
    "Hybrid" : 1000133,
    "Network Isolation" : 1000151,
    "Cache" : 1000159
}

parent_map = {
    "AppService": ["Web Front End", "APIs"],
    "SQLDatabase": ["Storage", "DataProcessing", "Reporting"],
    "Storage": ["Storage", "Reporting", "DataProcessing"],
    "LogicApps": ["DataProcessing"],
    "DataFactory": ["DataProcessing"],
    "DataLakeAnalytics": ["DataProcessing", "Reporting"],
    "DataLakeStore": ["Storage", "Reporting", "DataProcessing"],
    "NotificationHub": ["Commuincation Hub"],
    "ServiceFabric": ["Web Front End", "APIs", "Backend Processing"],
    "Search": ["APIs", "Backend Processing"],
    "VirtualMachine": ["Web Front End", "APIs", "Backend Processing", "DataProcessing"],
    "VirtualNetwork": ["Network Isolation", "Hybrid"],
    "AnalysisServices": ["DataProcessing", "Reporting"],
    "Batch": ["Backend Processing"],
    "RedisCache": ["Cache"],
    "EventHub": ["Commuincation Hub", "Hybrid"],
    "ODG": ["Hybrid"],
    "TrafficManager": ["Network Isolation"],
    "ERvNet": ["Hybrid", "Network Isolation"],
    "Automation": ["SubscriptionCore"],
    "CosmosDB": ["Storage", "DataProcessing", "Reporting"],
    "StreamAnalytics": ["DataProcessing", "Reporting"],
    "CloudService": ["Web Front End", "APIs", "Backend Processing"],
    "LoadBalancer": ["Network Isolation"],
    "APIConnection": ["DataProcessing"],
    "BotService": ["APIs", "Commuincation Hub", "Web Front End"],
    "ContainerInstances": ["Web Front End", "APIs", "DataProcessing", "Backend Processing"],
    "DataFactoryV2": ["DataProcessing", "Backend Processing"],
    "KeyVault": ["Security Infra"]
}

BIG_PRIME = 824633720831

def get_feature_hash(features):
    hash_val = 1
    for feature in features:
        hash_val *= feature_hash_map[feature]
        hash_val %= BIG_PRIME
    return hash_val

def get_category_hash(categories):
    hash_val = 1
    for category in categories:
        hash_val *= category_hash_map[category]
        hash_val %= BIG_PRIME
    return hash_val

def get_parents_list(features):
    parents = []
    for feature in features:
        parents.append(parent_map[feature][0])
    return parents

In [8]:
# df["Feature"].unique()
get_parents_list(['AppService', 'StreamAnalytics', 'KeyVault', 'Storage', 'SQLDatabase'])

['Web Front End', 'DataProcessing', 'Security Infra', 'Storage', 'Storage']

In [9]:
get_feature_hash(['AppService', 'StreamAnalytics', 'KeyVault', 'Storage', 'SQLDatabase'])

397933297130

In [10]:
master_hash_table = dict()
for res_id in feature_combinations:
    features = feature_combinations[res_id]
    feature_hash = get_feature_hash(features)
    int_list = master_hash_table.setdefault(feature_hash, {"features": features, "counts": 0, "info": failures[res_id]})
    int_list["counts"] += 1
    # master_hash_table[feature_hash] = int_list

In [11]:
for x in master_hash_table:
    print(master_hash_table[x])
    print("*" * 100)

{'features': {'KeyVault', 'Storage', 'SQLDatabase', 'AppService', 'Automation', 'EventHub', 'StreamAnalytics'}, 'counts': 6, 'info': {'Totals': 709, 'Fails': 448, 'Success': 261}}
****************************************************************************************************
{'features': {'KeyVault', 'Storage', 'LogicApps', 'AppService'}, 'counts': 6, 'info': {'Totals': 285, 'Fails': 193, 'Success': 92}}
****************************************************************************************************
{'features': {'KeyVault', 'Storage', 'AppService', 'EventHub'}, 'counts': 2, 'info': {'Totals': 363, 'Fails': 221, 'Success': 142}}
****************************************************************************************************
{'features': {'Automation', 'EventHub'}, 'counts': 1, 'info': {'Totals': 34, 'Fails': 31, 'Success': 3}}
****************************************************************************************************
{'features': {'TrafficManager'}, 'counts': 5, '

In [12]:
list_ = ['StreamAnalytics', 'Storage', 'Automation']
master_category_table = dict()
updated = False

def recurse(my_list, hash_cache, info, string_cache):
    global updated
    if my_list:
        for parent in parent_map[my_list[0]]:
            recurse(my_list[1:], (hash_cache * category_hash_map[parent]) % BIG_PRIME, info, parent + " -> " + string_cache)
    else:
        to_insert = dict()
        if hash_cache in master_category_table and not updated:
            # ADD VALUES
            previous_info = master_category_table[hash_cache]
            to_insert["Totals"] = previous_info["Totals"] + info["Totals"]
            to_insert["Fails"] = previous_info["Fails"] + info["Fails"]
            to_insert["Success"] = previous_info["Success"] + info["Success"]
        else:
            # FIRST TIME
            to_insert["Totals"] = info["Totals"]
            to_insert["Fails"] = info["Fails"]
            to_insert["Success"] = info["Success"]
        master_category_table[hash_cache] = to_insert
        updated = True
        print("Category combination: {}".format(string_cache))
        print("*" * 50)
    print("#" * 70)

# recurse(list_, 1, {'Totals': 363, 'Fails': 221, 'Success': 142}, "")

In [13]:
for x in master_hash_table:
    global updated
    updated = False
    recurse(list(master_hash_table[x]["features"]), 1, master_hash_table[x]["info"], "")

Category combination: DataProcessing -> Commuincation Hub -> SubscriptionCore -> Web Front End -> Storage -> Storage -> Security Infra -> 
**************************************************
######################################################################
Category combination: Reporting -> Commuincation Hub -> SubscriptionCore -> Web Front End -> Storage -> Storage -> Security Infra -> 
**************************************************
######################################################################
######################################################################
Category combination: DataProcessing -> Hybrid -> SubscriptionCore -> Web Front End -> Storage -> Storage -> Security Infra -> 
**************************************************
######################################################################
Category combination: Reporting -> Hybrid -> SubscriptionCore -> Web Front End -> Storage -> Storage -> Security Infra -> 
***************************************

In [14]:
master_category_table

{552830095053: {'Totals': 709, 'Fails': 448, 'Success': 261},
 119502410524: {'Totals': 709, 'Fails': 448, 'Success': 261},
 820705546872: {'Totals': 709, 'Fails': 448, 'Success': 261},
 241144864749: {'Totals': 709, 'Fails': 448, 'Success': 261},
 228905291221: {'Totals': 709, 'Fails': 448, 'Success': 261},
 551646474478: {'Totals': 709, 'Fails': 448, 'Success': 261},
 332285631080: {'Totals': 709, 'Fails': 448, 'Success': 261},
 694337869333: {'Totals': 709, 'Fails': 448, 'Success': 261},
 174985799197: {'Totals': 709, 'Fails': 448, 'Success': 261},
 234557722527: {'Totals': 709, 'Fails': 448, 'Success': 261},
 629846176186: {'Totals': 709, 'Fails': 448, 'Success': 261},
 714349422872: {'Totals': 709, 'Fails': 448, 'Success': 261},
 779691614501: {'Totals': 709, 'Fails': 448, 'Success': 261},
 303373977051: {'Totals': 709, 'Fails': 448, 'Success': 261},
 325773591087: {'Totals': 709, 'Fails': 448, 'Success': 261},
 105002993533: {'Totals': 709, 'Fails': 448, 'Success': 261},
 4698007

In [19]:
test_features = ["Storage", "Automation"]
master_hash_table[get_feature_hash(test_features)]

{'features': {'Automation', 'Storage'},
 'counts': 1,
 'info': {'Totals': 28, 'Fails': 21, 'Success': 7}}

In [20]:
master_category_table[get_category_hash(get_parents_list(test_features))]

{'Totals': 28, 'Fails': 21, 'Success': 7}

In [27]:
def get_feature_safety(features):
    print("Features: {}".format(features))
    feature_info = master_hash_table[get_feature_hash(features)]
    print("Possible Parents: {}".format(get_parents_list(features)))
    category_info = master_category_table[get_category_hash(get_parents_list(features))]
    print("Feature info: {}".format(feature_info["info"]))
    print("Category info: {}".format(category_info))

In [28]:
get_feature_safety(["Storage", "Automation"])

Features: ['Storage', 'Automation']
Possible Parents: ['Storage', 'SubscriptionCore']
Feature info: {'Totals': 28, 'Fails': 21, 'Success': 7}
Category info: {'Totals': 28, 'Fails': 21, 'Success': 7}
