In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
import seaborn as sns
from sklearn.model_selection import cross_val_score

from nltk.stem.snowball import SnowballStemmer
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from textblob import TextBlob, Word

plt.style.use('fivethirtyeight')
%matplotlib inline

In [2]:
file = './data/sig_train_data.csv'

classifier = pd.read_csv(file, encoding='latin1')

In [3]:
classifier.columns = ['question_num', 'question', 'response', 'maturity',
       'misc_info', 'aup_ref', 'iso_num',
       'iso_desc', 'sig_class']

In [4]:
classifier.head()

Unnamed: 0,question_num,question,response,maturity,misc_info,aup_ref,iso_num,iso_desc,sig_class
0,A.1,Is there a risk assessment program that has be...,Yes,5.0,An enterprise risk assessment is performed ann...,A.1 IT & Infrastructure Risk Governance,5.1 6.1.2,"Leadership & Commitment, Information Security ...",Risk Assessment
1,A.1.1,"A risk assessment, conducted within the last 1...",Yes,,,A.2 IT & Infrastructure Risk Assessment Life C...,8.2,Information security risk assessment,Risk Assessment
2,A.1.2,Risk Governance?,Yes,,,A.1 IT & Infrastructure Risk Governance,,,Risk Assessment
3,A.1.3,"Range of assets to include: people, processes,...",Yes,,,A.1 IT & Infrastructure Risk Governance,,,Risk Assessment
4,A.1.4,"Range of threats to include: malicious, natura...",Yes,,,A.1 IT & Infrastructure Risk Governance,6.1.2,Information Security Risk Assessment,Risk Assessment


In [5]:
classifier.loc[:, 'sig_class'].value_counts()

Application Security                         215
Operations Management                        120
Business Resiliency                          105
Asset and Information Management             104
Server Security                               96
Physical and Environment                      86
End User Device Security                      76
Access Control                                74
Network Security                              66
Incident Event & Communication Management     45
Risk Assessment                               43
Security Policy                               42
Threat Management                             38
Compliance                                    31
Human Resource Security                       28
Privacy                                       18
Organizational Security                       10
Name: sig_class, dtype: int64

In [6]:
classifier.loc[:, 'sig_class_num'] = classifier.loc[:, 'sig_class'].map({
    'Application Security':1,
    'Operations Management':2,
    'Business Resiliency':3,
    'Asset and Information Management':4,
    'Server Security':5,
    'Physical and Environment':6,
    'End User Device Security':7,
    'Access Control':8,
    'Network Security':9,
    'Incident Event & Communication Management':10,
    'Risk Assessment':11,
    'Security Policy':12,
    'Threat Management':13,
    'Compliance':14,
    'Human Resource Security':15,
    'Privacy':16,
    'Organizational Security':17,
})

In [7]:
classifier.loc[:, 'sig_class_num'].value_counts()

1     215
2     120
3     105
4     104
5      96
6      86
7      76
8      74
9      66
10     45
11     43
12     42
13     38
14     31
15     28
16     18
17     10
Name: sig_class_num, dtype: int64

In [8]:
classifier

Unnamed: 0,question_num,question,response,maturity,misc_info,aup_ref,iso_num,iso_desc,sig_class,sig_class_num
0,A.1,Is there a risk assessment program that has be...,Yes,5.0,An enterprise risk assessment is performed ann...,A.1 IT & Infrastructure Risk Governance,5.1 6.1.2,"Leadership & Commitment, Information Security ...",Risk Assessment,11
1,A.1.1,"A risk assessment, conducted within the last 1...",Yes,,,A.2 IT & Infrastructure Risk Assessment Life C...,8.2,Information security risk assessment,Risk Assessment,11
2,A.1.2,Risk Governance?,Yes,,,A.1 IT & Infrastructure Risk Governance,,,Risk Assessment,11
3,A.1.3,"Range of assets to include: people, processes,...",Yes,,,A.1 IT & Infrastructure Risk Governance,,,Risk Assessment,11
4,A.1.4,"Range of threats to include: malicious, natura...",Yes,,,A.1 IT & Infrastructure Risk Governance,6.1.2,Information Security Risk Assessment,Risk Assessment,11
5,A.1.5,Risk scoping?,Yes,,,A.1 IT & Infrastructure Risk Governance,6.1.2,Information Security Risk Assessment,Risk Assessment,11
6,A.1.6,Risk context?,Yes,,,A.1 IT & Infrastructure Risk Governance,6.1.2,Information Security Risk Assessment,Risk Assessment,11
7,A.1.7,Risk training plan?,Yes,,,A.1 IT & Infrastructure Risk Governance,6.1.2,Information Security Risk Assessment,Risk Assessment,11
8,A.1.8,Risk evaluation criteria?,Yes,,,A.1 IT & Infrastructure Risk Governance,6.1.2,Information Security Risk Assessment,Risk Assessment,11
9,A.1.9,"Risk scenarios? If yes, do they include:",Yes,,,A.1 IT & Infrastructure Risk Governance,6.1.2,Information Security Risk Assessment,Risk Assessment,11


In [9]:
#replace NaN Values

classifier.loc[:, 'misc_info'].fillna('', inplace=True)
classifier.loc[:, 'iso_desc'].fillna('', inplace=True)
classifier.loc[:, 'aup_ref'].fillna('', inplace=True)

In [10]:
# combining multiple columns to create master text


classifier.loc[:, 'textfield']   = (classifier.loc[:, 'question'] + ' ' 
                                    + classifier.loc[:, 'misc_info'] + ' ' 
                                    + classifier.loc[:, 'iso_desc'] + ' ' 
                                    + classifier.loc[:, 'aup_ref'])

In [11]:
X = classifier.loc[:, 'textfield']
y = classifier.loc[:, 'sig_class_num']

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

# EDA on Question Text

In [13]:
vect = CountVectorizer()
vect.fit(X_train)
X_train_dtm = vect.transform(X_train)

In [14]:
X_train_dtm

<897x2082 sparse matrix of type '<class 'numpy.int64'>'
	with 15481 stored elements in Compressed Sparse Row format>

In [15]:
X_train_dtm.todense().shape

(897, 2082)

In [16]:
X_train_dtm.todense().sum(axis=0)

matrix([[11,  1, 19, ...,  4,  2,  3]], dtype=int64)

In [17]:
vect.get_feature_names()

['10',
 '1000s',
 '11',
 '12',
 '120',
 '140',
 '15',
 '16',
 '18',
 '180',
 '19',
 '1x',
 '2048',
 '21',
 '24',
 '24x7',
 '24x7x365',
 '25',
 '27001',
 '2731',
 '2fa',
 '30',
 '33',
 '3402',
 '35',
 '365',
 '37',
 '3rd',
 '40',
 '443',
 '60',
 '6671',
 '80',
 '800',
 '802',
 '88',
 '90',
 'ability',
 'able',
 'about',
 'above',
 'abuse',
 'accept',
 'acceptable',
 'acceptance',
 'accepted',
 'accepting',
 'access',
 'accessed',
 'accessible',
 'accessing',
 'accesssed',
 'accidental',
 'accordance',
 'according',
 'account',
 'accounting',
 'accounts',
 'accuracy',
 'acknowledge',
 'acquisition',
 'acted',
 'action',
 'actioned',
 'actions',
 'activate',
 'activating',
 'active',
 'actively',
 'activex',
 'activities',
 'activity',
 'ad',
 'addition',
 'additional',
 'additionally',
 'address',
 'addressed',
 'addresses',
 'addressing',
 'adequacy',
 'adjustment',
 'admin',
 'administration',
 'administrative',
 'administrator',
 'administrators',
 'adopted',
 'advertising',
 'advises

In [18]:
vect.vocabulary_

{'are': 155,
 'apis': 136,
 'tested': 1862,
 'for': 767,
 'security': 1659,
 'weaknesses': 2030,
 'if': 888,
 'yes': 2076,
 'does': 588,
 'this': 1875,
 'include': 911,
 'management': 1113,
 'of': 1229,
 'technical': 1843,
 'vulnerabilities': 2022,
 'is': 996,
 'scoped': 1644,
 'systems': 1825,
 'and': 125,
 'data': 470,
 'ever': 684,
 'used': 1976,
 'in': 902,
 'the': 1867,
 'test': 1861,
 'development': 539,
 'or': 1262,
 'qa': 1472,
 'environments': 660,
 'protection': 1452,
 'software': 1739,
 'program': 1436,
 'bouncycastle': 254,
 'openssl': 1248,
 'prior': 1411,
 'to': 1893,
 'device': 540,
 'on': 1240,
 'boarding': 250,
 'constituents': 396,
 'required': 1565,
 'sign': 1712,
 'legal': 1045,
 'agreement': 98,
 'which': 2040,
 'details': 525,
 'obligations': 1224,
 'rights': 1606,
 'related': 1527,
 'mobile': 1159,
 'devices': 541,
 'it': 1006,
 'policy': 1378,
 'errors': 666,
 'resulting': 1588,
 'from': 781,
 'incomplete': 915,
 'inaccurate': 903,
 'business': 267,
 'reporting'

# Train Test Classifier using Naive Bayes

In [19]:
#Create Document-term Matrices with CountVectorizer()
vect = CountVectorizer()
vect.fit(X_train)
X_train_dtm = vect.transform(X_train)
X_test_dtm = vect.transform(X_test)

#Use Naive Bayes to predict sig_class_num
nb = MultinomialNB()
nb.fit(X_train_dtm, y_train)
y_pred_class = nb.predict(X_test_dtm)

In [20]:
# calculate accuracy
metrics.accuracy_score(y_test, y_pred_class)

0.62333333333333329

In [21]:
y_test.value_counts()

1     53
3     36
2     26
4     26
5     24
6     22
7     19
9     18
8     15
12    11
15    10
11     9
14     9
10     8
13     7
17     4
16     3
Name: sig_class_num, dtype: int64

In [22]:
# Define a function that accepts a vectorizer and calculates the accuracy.

def tokenize_test(vect):
    vect.fit(X_train)
    X_train_dtm = vect.transform(X_train)
    print('Number of features: ', X_train_dtm.shape[1])
    X_test_dtm = vect.transform(X_test)
    
    nb = MultinomialNB()
    nb.fit(X_train_dtm, y_train)
    y_pred_class = nb.predict(X_test_dtm)
    print('Accuracy: ', metrics.accuracy_score(y_test, y_pred_class))

In [23]:
vect = CountVectorizer()
tokenize_test(vect)

Number of features:  2082
Accuracy:  0.623333333333


In [24]:
vect = CountVectorizer(ngram_range=(1,2))
tokenize_test(vect)

Number of features:  10439
Accuracy:  0.626666666667


In [25]:
vect = CountVectorizer(stop_words='english')

vect.get_stop_words()

frozenset({'a',
           'about',
           'above',
           'across',
           'after',
           'afterwards',
           'again',
           'against',
           'all',
           'almost',
           'alone',
           'along',
           'already',
           'also',
           'although',
           'always',
           'am',
           'among',
           'amongst',
           'amoungst',
           'amount',
           'an',
           'and',
           'another',
           'any',
           'anyhow',
           'anyone',
           'anything',
           'anyway',
           'anywhere',
           'are',
           'around',
           'as',
           'at',
           'back',
           'be',
           'became',
           'because',
           'become',
           'becomes',
           'becoming',
           'been',
           'before',
           'beforehand',
           'behind',
           'being',
           'below',
           'beside',
           'besides'

In [26]:
tokenize_test(vect)

Number of features:  1932
Accuracy:  0.63


In [27]:
vect = CountVectorizer(stop_words='english')
tokenize_test(vect)

Number of features:  1932
Accuracy:  0.63


In [28]:
vect.get_feature_names()

['10',
 '1000s',
 '11',
 '12',
 '120',
 '140',
 '15',
 '16',
 '18',
 '180',
 '19',
 '1x',
 '2048',
 '21',
 '24',
 '24x7',
 '24x7x365',
 '25',
 '27001',
 '2731',
 '2fa',
 '30',
 '33',
 '3402',
 '35',
 '365',
 '37',
 '3rd',
 '40',
 '443',
 '60',
 '6671',
 '80',
 '800',
 '802',
 '88',
 '90',
 'ability',
 'able',
 'abuse',
 'accept',
 'acceptable',
 'acceptance',
 'accepted',
 'accepting',
 'access',
 'accessed',
 'accessible',
 'accessing',
 'accesssed',
 'accidental',
 'accordance',
 'according',
 'account',
 'accounting',
 'accounts',
 'accuracy',
 'acknowledge',
 'acquisition',
 'acted',
 'action',
 'actioned',
 'actions',
 'activate',
 'activating',
 'active',
 'actively',
 'activex',
 'activities',
 'activity',
 'ad',
 'addition',
 'additional',
 'additionally',
 'address',
 'addressed',
 'addresses',
 'addressing',
 'adequacy',
 'adjustment',
 'admin',
 'administration',
 'administrative',
 'administrator',
 'administrators',
 'adopted',
 'advertising',
 'advises',
 'affect',
 'affe

In [29]:
vect = CountVectorizer(stop_words='english', ngram_range=(1,2), min_df=1)
tokenize_test(vect)

Number of features:  8258
Accuracy:  0.656666666667


# Textblob

In [30]:
question = TextBlob(classifier.loc[0, 'question'])

In [31]:
question

TextBlob("Is there a risk assessment program that has been approved by management, communicated to constituents and an owner to maintain and review the program? if yes, does it include:")

In [32]:
stemmer = SnowballStemmer('english')

In [33]:
question.words

WordList(['Is', 'there', 'a', 'risk', 'assessment', 'program', 'that', 'has', 'been', 'approved', 'by', 'management', 'communicated', 'to', 'constituents', 'and', 'an', 'owner', 'to', 'maintain', 'and', 'review', 'the', 'program', 'if', 'yes', 'does', 'it', 'include'])

In [34]:
#Stemmer is crude form
[stemmer.stem(word) for word in question.words]

['is',
 'there',
 'a',
 'risk',
 'assess',
 'program',
 'that',
 'has',
 'been',
 'approv',
 'by',
 'manag',
 'communic',
 'to',
 'constitu',
 'and',
 'an',
 'owner',
 'to',
 'maintain',
 'and',
 'review',
 'the',
 'program',
 'if',
 'yes',
 'doe',
 'it',
 'includ']

In [35]:
print([word.lemmatize(pos='v') for word in question.words])

['Is', 'there', 'a', 'risk', 'assessment', 'program', 'that', 'have', 'be', 'approve', 'by', 'management', 'communicate', 'to', 'constituents', 'and', 'an', 'owner', 'to', 'maintain', 'and', 'review', 'the', 'program', 'if', 'yes', 'do', 'it', 'include']


In [36]:
def split_into_lemmas(text):
    text = str(text).lower()
    words = TextBlob(text).words
    return (word.lemmatize(pos='v') for word in words)

In [37]:
vect = CountVectorizer(stop_words ='english', analyzer=split_into_lemmas, decode_error='replace', ngram_range=(1,1), min_df=3)
tokenize_test(vect)

Number of features:  851
Accuracy:  0.666666666667


In [38]:
classifier

Unnamed: 0,question_num,question,response,maturity,misc_info,aup_ref,iso_num,iso_desc,sig_class,sig_class_num,textfield
0,A.1,Is there a risk assessment program that has be...,Yes,5.0,An enterprise risk assessment is performed ann...,A.1 IT & Infrastructure Risk Governance,5.1 6.1.2,"Leadership & Commitment, Information Security ...",Risk Assessment,11,Is there a risk assessment program that has be...
1,A.1.1,"A risk assessment, conducted within the last 1...",Yes,,,A.2 IT & Infrastructure Risk Assessment Life C...,8.2,Information security risk assessment,Risk Assessment,11,"A risk assessment, conducted within the last 1..."
2,A.1.2,Risk Governance?,Yes,,,A.1 IT & Infrastructure Risk Governance,,,Risk Assessment,11,Risk Governance? A.1 IT & Infrastructure Ris...
3,A.1.3,"Range of assets to include: people, processes,...",Yes,,,A.1 IT & Infrastructure Risk Governance,,,Risk Assessment,11,"Range of assets to include: people, processes,..."
4,A.1.4,"Range of threats to include: malicious, natura...",Yes,,,A.1 IT & Infrastructure Risk Governance,6.1.2,Information Security Risk Assessment,Risk Assessment,11,"Range of threats to include: malicious, natura..."
5,A.1.5,Risk scoping?,Yes,,,A.1 IT & Infrastructure Risk Governance,6.1.2,Information Security Risk Assessment,Risk Assessment,11,Risk scoping? Information Security Risk Asses...
6,A.1.6,Risk context?,Yes,,,A.1 IT & Infrastructure Risk Governance,6.1.2,Information Security Risk Assessment,Risk Assessment,11,Risk context? Information Security Risk Asses...
7,A.1.7,Risk training plan?,Yes,,,A.1 IT & Infrastructure Risk Governance,6.1.2,Information Security Risk Assessment,Risk Assessment,11,Risk training plan? Information Security Risk...
8,A.1.8,Risk evaluation criteria?,Yes,,,A.1 IT & Infrastructure Risk Governance,6.1.2,Information Security Risk Assessment,Risk Assessment,11,Risk evaluation criteria? Information Securit...
9,A.1.9,"Risk scenarios? If yes, do they include:",Yes,,,A.1 IT & Infrastructure Risk Governance,6.1.2,Information Security Risk Assessment,Risk Assessment,11,"Risk scenarios? If yes, do they include: Info..."


In [39]:
#help(LogisticRegression())

# Deploy model on TRC_Master Data

In [40]:
trc_file = './data/trc_master_train_data.csv'

In [41]:
trc_test_data = pd.read_csv(trc_file, encoding='latin1')

In [42]:
trc_test_data

Unnamed: 0,Tag,Question,Answer,Updated date,Answer provided by,Expiration Date
0,access control,Are all system login screens configured to dis...,"This is in place for some, but not all systems",,,
1,access control,Does your company disable default privileged a...,Privileged accounts granted based on role.,,,
2,access control,,"Only limited, named systems administrators hav...",,,
3,access control policy,copy of your Access Control Policy and process,Access is granted based on employee role. Fac...,,,
4,"Access Control,",Please explain who initiates and approves the ...,Access to systems and resources is role-based....,,,
5,"access control, accounts,",Is there a dedicated team tasked with create/a...,This is handled by the FactSet Systems Enginee...,,,
6,"access control, authorization, system admin",Who authorizes access,FactSet server administrators grant access con...,,,
7,"access control, employee management system, le...",How are your internal users (employees) grante...,Employees are issued user IDs specific to the ...,,,
8,"access control, password policy",Does your company enforce a user account creat...,Account IDs are only reused if an employee or ...,,,
9,"access control, password policy, encryption","Does your company require all passwords, inclu...","This is true for all network, application and ...",,,


In [43]:
# Fill in NaN data with blanks
trc_test_data.loc[:, 'Tag'].fillna('', inplace=True)
trc_test_data.loc[:, 'Question'].fillna('', inplace=True)
trc_test_data.loc[:, 'Answer'].fillna('', inplace=True)

In [44]:
# Concatenate text columns
trc_test_data.loc[:, 'text_concat'] = (trc_test_data.loc[:, 'Tag'] + ' ' 
                                       + trc_test_data.loc[:, 'Question'] + ' ' 
                                       + trc_test_data.loc[:, 'Answer'])

In [45]:
trc_test_data

Unnamed: 0,Tag,Question,Answer,Updated date,Answer provided by,Expiration Date,text_concat
0,access control,Are all system login screens configured to dis...,"This is in place for some, but not all systems",,,,access control Are all system login screens co...
1,access control,Does your company disable default privileged a...,Privileged accounts granted based on role.,,,,access control Does your company disable defau...
2,access control,,"Only limited, named systems administrators hav...",,,,"access control Only limited, named systems ad..."
3,access control policy,copy of your Access Control Policy and process,Access is granted based on employee role. Fac...,,,,access control policy copy of your Access Cont...
4,"Access Control,",Please explain who initiates and approves the ...,Access to systems and resources is role-based....,,,,"Access Control, Please explain who initiates ..."
5,"access control, accounts,",Is there a dedicated team tasked with create/a...,This is handled by the FactSet Systems Enginee...,,,,"access control, accounts, Is there a dedicated..."
6,"access control, authorization, system admin",Who authorizes access,FactSet server administrators grant access con...,,,,"access control, authorization, system admin Wh..."
7,"access control, employee management system, le...",How are your internal users (employees) grante...,Employees are issued user IDs specific to the ...,,,,"access control, employee management system, le..."
8,"access control, password policy",Does your company enforce a user account creat...,Account IDs are only reused if an employee or ...,,,,"access control, password policy Does your comp..."
9,"access control, password policy, encryption","Does your company require all passwords, inclu...","This is true for all network, application and ...",,,,"access control, password policy, encryption Do..."


In [46]:
X_master_test = trc_test_data.loc[:, 'text_concat']

In [47]:


#Create Document-term Matrices with CountVectorizer()
vect = CountVectorizer(stop_words ='english', analyzer=split_into_lemmas, decode_error='replace', ngram_range=(1,1), min_df=3)
vect.fit(X_train)
X_train_dtm = vect.transform(X_train)
# X_test_dtm = vect.transform(X_test)
X_master_test_dtm = vect.transform(X_master_test)

#Use Naive Bayes to predict sig_class_num
nb = MultinomialNB()
nb.fit(X_train_dtm, y_train)
y_pred_class = nb.predict(X_master_test_dtm)

In [48]:
# Convert y_pred_class results into a dataframe
class_pred = pd.DataFrame(y_pred_class)

In [49]:
trc_with_class_pred = pd.concat([trc_test_data, class_pred], axis=1)

In [50]:
trc_with_class_pred

Unnamed: 0,Tag,Question,Answer,Updated date,Answer provided by,Expiration Date,text_concat,0
0,access control,Are all system login screens configured to dis...,"This is in place for some, but not all systems",,,,access control Are all system login screens co...,8
1,access control,Does your company disable default privileged a...,Privileged accounts granted based on role.,,,,access control Does your company disable defau...,8
2,access control,,"Only limited, named systems administrators hav...",,,,"access control Only limited, named systems ad...",8
3,access control policy,copy of your Access Control Policy and process,Access is granted based on employee role. Fac...,,,,access control policy copy of your Access Cont...,8
4,"Access Control,",Please explain who initiates and approves the ...,Access to systems and resources is role-based....,,,,"Access Control, Please explain who initiates ...",8
5,"access control, accounts,",Is there a dedicated team tasked with create/a...,This is handled by the FactSet Systems Enginee...,,,,"access control, accounts, Is there a dedicated...",8
6,"access control, authorization, system admin",Who authorizes access,FactSet server administrators grant access con...,,,,"access control, authorization, system admin Wh...",8
7,"access control, employee management system, le...",How are your internal users (employees) grante...,Employees are issued user IDs specific to the ...,,,,"access control, employee management system, le...",8
8,"access control, password policy",Does your company enforce a user account creat...,Account IDs are only reused if an employee or ...,,,,"access control, password policy Does your comp...",8
9,"access control, password policy, encryption","Does your company require all passwords, inclu...","This is true for all network, application and ...",,,,"access control, password policy, encryption Do...",8


In [51]:
#Naming the last columns to class_num

trc_with_class_pred.columns = ['Tag','Question','Answer','Updated date','Answer provided by','Expiration Date','text_concat','pred_class_num']

In [52]:
trc_with_class_pred.loc[:, 'pred_class'] = trc_with_class_pred.loc[:, 'pred_class_num'].map({
    1:'Application Security',
    2:'Operations Management',
    3:'Business Resiliency',
    4:'Asset and Information Management',
    5:'Server Security',
    6:'Physical and Environment',
    7:'End User Device Security',
    8:'Access Control',
    9:'Network Security',
    10:'Incident Event & Communication Management',
    11:'Risk Assessment',
    12:'Security Policy',
    13:'Threat Management',
    14:'Compliance',
    15:'Human Resource Security',
    16:'Privacy',
    17:'Organizational Security'
})

In [53]:
trc_with_class_pred

Unnamed: 0,Tag,Question,Answer,Updated date,Answer provided by,Expiration Date,text_concat,pred_class_num,pred_class
0,access control,Are all system login screens configured to dis...,"This is in place for some, but not all systems",,,,access control Are all system login screens co...,8,Access Control
1,access control,Does your company disable default privileged a...,Privileged accounts granted based on role.,,,,access control Does your company disable defau...,8,Access Control
2,access control,,"Only limited, named systems administrators hav...",,,,"access control Only limited, named systems ad...",8,Access Control
3,access control policy,copy of your Access Control Policy and process,Access is granted based on employee role. Fac...,,,,access control policy copy of your Access Cont...,8,Access Control
4,"Access Control,",Please explain who initiates and approves the ...,Access to systems and resources is role-based....,,,,"Access Control, Please explain who initiates ...",8,Access Control
5,"access control, accounts,",Is there a dedicated team tasked with create/a...,This is handled by the FactSet Systems Enginee...,,,,"access control, accounts, Is there a dedicated...",8,Access Control
6,"access control, authorization, system admin",Who authorizes access,FactSet server administrators grant access con...,,,,"access control, authorization, system admin Wh...",8,Access Control
7,"access control, employee management system, le...",How are your internal users (employees) grante...,Employees are issued user IDs specific to the ...,,,,"access control, employee management system, le...",8,Access Control
8,"access control, password policy",Does your company enforce a user account creat...,Account IDs are only reused if an employee or ...,,,,"access control, password policy Does your comp...",8,Access Control
9,"access control, password policy, encryption","Does your company require all passwords, inclu...","This is true for all network, application and ...",,,,"access control, password policy, encryption Do...",8,Access Control


In [55]:
trc_with_class_pred.to_csv('./data/trc_pred_class.csv')