In [305]:
# https://bokeh.pydata.org/en/latest/docs/user_guide/categorical.html
# https://www.programcreek.com/python/example/106843/bokeh.models.HoverTool
# https://towardsdatascience.com/different-ways-to-iterate-over-rows-in-a-pandas-dataframe-performance-comparison-dc0d5dcef8fe
from bokeh.io import show, output_notebook
from bokeh.models import BasicTicker, ColorBar, ColumnDataSource, LinearColorMapper, PrintfTickFormatter, HoverTool
from bokeh.palettes import Spectral6 as colorcat
from bokeh.plotting import figure
from bokeh.transform import transform
import random as rand
import pandas as pd
import numpy as np

output_notebook()

In [306]:
probabilities = ['None', 'Low', 'Medium', 'High', 'Critical']
impacts= ['None', 'Low', 'Medium', 'High', 'Critical']

probmap={'None':1, 'Low':3, 'Medium':4, 'High':5, 'Critical':5}
impactmap={'None':1, 'Low':4, 'Medium':5, 'High':18, 'Critical':20}

dict2=[(probability,impact,probmap[probability]*impactmap[impact]) for probability in probabilities for impact in impacts]

In [307]:
# dict1={"fruits":fruits,"regions":regions,"fruitvalues":xvals, "regionvals":yvals}
df=pd.DataFrame(dict2)
df=df.rename(columns={0:"probabilities",1:"impacts",2:"imputedweights"})
colsource=ColumnDataSource(df)

In [308]:
colors = colorcat # ["#75968f", "#a5bab7", "#c9d9d3", "#e2e2e2", "#dfccce", "#ddb7b1", "#cc7878", "#933b41", "#550b1d"]

toolKit = "pan,wheel_zoom,box_zoom,reset"
mapper = LinearColorMapper(palette=colors, low=min(df.imputedweights.values), high=max(df.imputedweights.values))
p = figure(x_range=probabilities, y_range=impacts, 
           plot_width=400, plot_height=400, 
           title="Risk Heat Map",
           tools=toolKit,
           x_axis_label="Frequency",
           y_axis_label="Impact"
          )
p.rect(x="probabilities", y="impacts",source=colsource, width=1, height=1, fill_color=transform("imputedweights",mapper))



# hoverTool=HoverTool(
#         tooltips="""
#         <div>
#             <div>
#                 <h3>Weight = @imputedweights</h3>
#             </div>
#         </div>
#         """)

# p.add_tools(hoverTool)



In [309]:
# Get ContiNube Controls dataset
dataFrame=pd.read_csv('out.csv').drop('Unnamed: 0', axis=1).sort_values(by=['cumnum']).reindex()
dataFrame['computedscore']=-1  # between 0-100. weights * compliance%. 
dataFrame['imputedscore']=-1 # between 0-100. To be defined
dataFrame['computedweight']=-1 # between 1-10. Weight this is computed for non-leaf levels

# is there a better way to do both assignments at once
def assignLeafWeight(row):
        return float(format(row['weight'],'0.5f'))
dataFrame.loc[:,'computedweight']=dataFrame[dataFrame['leaf']>0].apply(assignLeafWeight,axis=1)

def assignScoreWeight(row):
        return float(format(row['compliance_pct'],'0.5f'))
dataFrame.loc[:,'computedscore']=dataFrame[dataFrame['leaf']>0].apply(assignScoreWeight,axis=1)

cols=['cumnum','level','control_name','computedscore','imputedscore','severity','impact','frequency','weight', 'computedweight']
controldomains=dataFrame[dataFrame['root']==1][cols]
controldomains=controldomains.reindex()


In [310]:
controldomains

Unnamed: 0,cumnum,level,control_name,computedscore,imputedscore,severity,impact,frequency,weight,computedweight
0,10000,1,access control,,-1,,,,-1,
18,20000,1,awareness and training,,-1,,,,-1,
23,30000,1,audit and accountability,,-1,,,,-1,
31,40000,1,security assessment and authorization,,-1,,,,-1,
41,50000,1,configuration management,,-1,,,,-1,
51,60000,2,cm-08-(03) information system component invent...,,-1,,,,-1,
52,60000,1,contingency planning,,-1,,,,-1,
58,70000,1,identification and authentication,,-1,,,,-1,
63,80000,1,incident response,,-1,,,,-1,
70,90000,1,maintainence,,-1,,,,-1,


In [325]:
# computeScore():
# inputs: control cumnum range: start, end, level
# outputs: computedscore, computedweight
# for all controls cumsum >start and <end at the same level
#     if computedscore <0 # not assigned. move recursive to the next level
#        find the start and end of its next level
#        call computeScore() with newstart, newend, newlevel
#        computedscore, computedweight = return() 
#     cumcount = cumcount + 1
#     cumweight = cumweight + computedweight
#     cumscore = cumscore + (computedscore * computedweight); 
#  return cumscore/cumweight, cumweight/cumcount


# Calculate score for each leaf node in dataFrame
def computeScore(after_cumnum, before_cumnum, curlevel):
    # return [cumcount, score, weight]
    computedscore=0
    computedweight=0
    compcount=0
    #find all controls within the range for the current hierarchy level
    tempdf=dataFrame[((dataFrame['cumnum']>after_cumnum) & (dataFrame['cumnum']<before_cumnum) & (dataFrame['level']==curlevel))].sort_values(by=['cumnum'])
    cniterator=tempdf.iterrows()
    for (recidx, rec) in cniterator:
        if (rec['computedscore']<0): # computed score is not assigned. call recursive
            # find start and end range and the next level
            new_curlevel=curlevel+1
            new_after_cumnum=rec['cumnum']
            try:
                nextidx, nextrec=next(cniterator)
                new_before_cumnum=nextrec['cumnum']
            except StopIteration: # last in the loop. Need to find the next major number
                new_before_cumnum=before_cumnum
            retdict =computeScore(new_after_cumnum,new_before_cumnum,new_curlevel)
            computedscore=retdict['computedscore']
            computedweight=retdict['computedweight']
        else:
            computedscore=rec['computedscore']
            computedweight=rec['computedweight']
        
        computedscore=+computedscore
        computedweight=+computedweight
        compcount=+1
    print(after_cumnum, before_cumnum, curlevel)
    print(compcount, computedscore, computedweight)
    if computedweight==0 | compcount==0:
        return dict({'computedscore':0, 'computedweight':0})
    return dict({'computedscore':computedscore/computedweight, 'computedweight':computedweight/compcount})

after_cumnum=20000
before_cumnum=30000
curlevel=2
retval=computeScore(after_cumnum,before_cumnum,curlevel)



20000 30000 2
1 56.0 5.0


In [326]:
after_cumnum=20000
before_cumnum=30000
curlevel=2
# print(computeScore(after_cumnum,before_cumnum,curlevel)
dataFrame[((dataFrame['cumnum']>after_cumnum) & (dataFrame['cumnum']<before_cumnum) & (dataFrame['level']==curlevel))].sort_values(by=['cumnum'])



Unnamed: 0,cumnum,number,level,control_name,root,leaf,compliance_pct,compliance_status,severity,impact,frequency,weight,computedscore,imputedscore,computedweight
19,21000,2.1,2,at-01 security awareness and training procedures,0,1,57,fail,high,high,high,7,57.0,-1,7.0
20,22000,2.2,2,at-02 security awareness training,0,1,100,pass,medium,medium,high,5,100.0,-1,5.0
21,23000,2.3,2,at-03 role-based security training,0,1,100,pass,critical,critical,critical,10,100.0,-1,10.0
22,24000,2.4,2,at-04 security training records,0,1,56,fail,medium,medium,high,5,56.0,-1,5.0


In [322]:
tempdf

Unnamed: 0,cumnum,number,level,control_name,root,leaf,compliance_pct,compliance_status,severity,impact,frequency,weight,computedscore,imputedscore,computedweight
1,11000,1.1,2,ac-01 access control policy and procedures,0,1,-1,non-determinant,high,high,high,7,-1.0,-1,7.0
2,12000,1.2,2,ac-02 account management,0,0,-1,,,,,-1,,-1,


In [273]:
cniterator=tempdf.iterrows()
for recidx, rec in cniterator:
    try:
        nextidx, nextrec=next(cniterator)
    except StopIteration:
        print("stop iteration")
    print(recidx, len(tempdf), "current one", rec['cumnum'],"next one", nextrec['cumnum'])

    

3 13 current one 12100 next one 12200
5 13 current one 12300 next one 12400
7 13 current one 12500 next one 12600
9 13 current one 12700 next one 12800
11 13 current one 12900 next one 13000
15 13 current one 13100 next one 13200
stop iteration
17 13 current one 13300 next one 13200


In [231]:
show(p)