In [8]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from xgboost import XGBClassifier
#from lightgbm import LGBMClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier, ExtraTreesClassifier
from sklearn.metrics import confusion_matrix, classification_report
from csv import writer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn import tree
import matplotlib.pyplot as plt
import time

# Mapping Function (Encoding of attributes)

In [9]:
#grp_gap ==> maximum number of allowed attribute values under each group. 
#It is initialized as 20. However, we can provide any value here

# grp is a dict of attribute groups used during mapping/encoding

#map_type == 1 >>> normal encoding
#map_type == 2 >>> encoding based on atrribute group



def GetAttributeMapping(data,grp=None,grp_gap=20,map_type=1):
    mapping = {}
    mapping['NotA'] = -1
    mapping[0] = 0
    mapping['YES'] = 1
    mapping['NO'] = 0
    if(map_type==1):
        for col in data.columns[:4]:
            #print(col,it_full[col].unique())
            col_un = data[col].unique()
            cnt = 1
            for val in col_un:
                if(val != 'NotA'):
                    mapping[val] = cnt
                    cnt = cnt + 1
        return mapping
    elif(map_type==2):
        for col in data.columns[1:3]:
            #print(col,it_full[col].unique())
            col_un = data[col].unique()
            cnt = 1
            for val in col_un:
                if(val != 'NotA'):
                    mapping[val] = cnt
                    cnt = cnt + 1
        
        for g in grp:
            grp_num = 1
            for member in grp[g]:
                mem_num = 1
                for val in member:
                    mapping[val] = grp_num*grp_gap+mem_num
                    mem_num = mem_num + 1
                grp_num = grp_num + 1
        return mapping
        
    

# Create groups of attributes

In [10]:
designation_grp = [['CEO'],
                   ['CTO'],
                   ['FINANCE MANAGER'],
                   ['HR MANAGER'],
                   ['DESIGNER','PROGRAMMER','SDE','TESTER'],
                   ['PROJECT_MANAGER','SYSTEM_ARCHITECT'],
                   ['PROJECT_LEADER','PRINCIPAL'],
                   ['IT_MANAGER','SECURTY_ENGINEER'],
                   ['NETWORK_ENGINEER'],
                   ['DATABASE_ENGINEER']
                  ]

resource_grp = [['EMP_DETAIL'],
                ['CLIENT_DETAIL'],
                ['SALARY_DETAIL','PF_DETAIL'],
                ['PROJECT_DETAIL','PROJECT_PLAN'],
                ['NETWORK_SETUP'],
                ['DATABASE'],
                ['PROJECT_COST','ALLOCATED_FUND','FINANCE_REPORT','TAX_DETAIL'],
                ['SERVER','STORAGE','GPU']
               ]

attr_grp={}
attr_grp['DESIGNATION'] = designation_grp
attr_grp['Resource'] = resource_grp


# Find Relation between common subject and object attributes 

In [11]:
def same_conditions(col1,col2):
    if ((col1 == -1) or (col2 == -1)):
        return 2
    elif (col1==col2):
        return 1
    else:
        return 0
def chk_nota(col):
    if(col==-1):
        return 1
    else:
        return 0

# Prepare the data for Training and Testing based on relation

In [12]:
def GetPreparedData(train_data,test_data,prep_type=4):
    data = pd.concat([train_data,test_data],axis=0)
    #print(data.columns)
    #exit(0)
    if(prep_type==1):#Naive
        map_type = 1
        mapping = GetAttributeMapping(data,grp=attr_grp,map_type=map_type)
        print(mapping)
        data_encoded = data.replace(mapping)
    elif(prep_type==2):#Columns for same attribute values in subject and object
        map_type = 1
        mapping = GetAttributeMapping(data,grp=attr_grp,map_type=map_type)
        data_encoded = data.replace(mapping)
        data_encoded['sameProj'] = data_encoded.apply(lambda x: same_conditions(x['Project_name'], x['Project_Name']), axis=1)
        data_encoded['sameDep'] = data_encoded.apply(lambda x: same_conditions(x['Department'], x['Department.1']), axis=1)
        data_encoded = data_encoded.drop('Department',axis=1)
        data_encoded = data_encoded.drop('Department.1',axis=1)
        data_encoded = data_encoded.drop('Project_name',axis=1)
        data_encoded = data_encoded.drop('Project_Name',axis=1)
    elif(prep_type==3):#Grouping of attributes
        map_type = 2
        mapping = GetAttributeMapping(data,grp=attr_grp,map_type=map_type)
        data_encoded = data.replace(mapping)
    elif(prep_type==4):#Grouping of attributes + Columns for same attribute values in subject and object
        map_type = 2
        mapping = GetAttributeMapping(data,grp=attr_grp,map_type=map_type)
        data_encoded = data.replace(mapping)
        data_encoded['sameProj'] = data_encoded.apply(lambda x: same_conditions(x['Project_name'], x['Project_Name']), axis=1)
        data_encoded['sameDep'] = data_encoded.apply(lambda x: same_conditions(x['Department'], x['Department.1']), axis=1)
        data_encoded = data_encoded.drop('Department',axis=1)
        data_encoded = data_encoded.drop('Department.1',axis=1)
        data_encoded = data_encoded.drop('Project_name',axis=1)
        data_encoded = data_encoded.drop('Project_Name',axis=1)
    elif(prep_type==5):#Naive+NACol
        map_type = 1
        mapping = GetAttributeMapping(data,grp=attr_grp,map_type=map_type)
        data_encoded = data.replace(mapping)
        data_encoded['Proj_NA'] = data_encoded.apply(lambda x: chk_nota(x['Project_name']), axis=1)
        
        
    
    #X = data_encoded.iloc[:,:-1]
    #y = data_encoded.iloc[:,-1]
    
    X = data_encoded.loc[:, data_encoded.columns != 'Access']
    y = data_encoded.loc[:, data_encoded.columns == 'Access']
    
    X_train = X.head(train_data.shape[0])
    X_test = X.tail(test_data.shape[0])
    y_train = y.head(train_data.shape[0])
    y_test = y.tail(test_data.shape[0])
    #print(X)
    return X_train, X_test, y_train, y_test
        

In [13]:

models_dict = {
    #"SupportVectorMachine": SVC(kernel="linear"),
    #"LogisticRegression": LogisticRegression(solver="saga", n_jobs=-1),
    "ArtificalNeuralNetwork": MLPClassifier(hidden_layer_sizes=30, max_iter=1500),
    "DL": MLPClassifier(hidden_layer_sizes=30, max_iter=1500),
    "DecisionTree": DecisionTreeClassifier(),
    #"ExtraTree": ExtraTreeClassifier(),
    "RandomForest": RandomForestClassifier(n_jobs=-1),
    "ExtraTrees": ExtraTreesClassifier(n_jobs=-1),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric="error", n_jobs=-1),
    #"LightGBM": LGBMClassifier(n_estimators=100, n_jobs=-1),
    #"AdaBoost": AdaBoostClassifier(n_estimators=100, learning_rate=1.0),
    "GradientBoosting": GradientBoostingClassifier(n_estimators=100, learning_rate=1.0),
}

# Reading Training and Test Data

In [15]:


#DESIGNATION	Project_name	Department	Resource	Project_Name	Department
fn=['S-Designation', 'S-Project', 'S-Dept', 'O-Res','O-Project', 'O-Dept']
cn=['AccessDenied', 'AccessGranted']

for (name, clf) in models_dict.items():
        train_data = pd.read_csv('Company/it_train_new1.csv')
        test_data = pd.read_csv('change_Company.csv')
        test1_data = pd.read_csv('change2_Company.csv')
        X_train, X_test, y_train, y_test = GetPreparedData(train_data,test_data,prep_type=3)
        X_train1, X_test1, y_train1, y_test1 = GetPreparedData(train_data,test1_data,prep_type=3)
        clf.fit(X_train.to_numpy(), y_train.to_numpy())
        clf.fit(X_train1.to_numpy(), y_train1.to_numpy())
        pred = clf.predict(X_test.to_numpy())
        pred1 = clf.predict(X_test1.to_numpy())
        
        print(confusion_matrix(y_test, pred))
        print(classification_report(y_test, pred))
        
        print(confusion_matrix(y_test1, pred1))
        print(classification_report(y_test1, pred1))
        
        #write these to the file
        
        with open("result_it_avc.txt", 'a') as f:
            f.write(name + "\n")
            f.write("Test Data\n")
            f.write(str(confusion_matrix(y_test, pred))) # Convert ndarray to string
            f.write(str(classification_report(y_test, pred))) # Convert ndarray to string
            f.write("Test1 Data\n")
            f.write(str(confusion_matrix(y_test1, pred1))) # Convert ndarray to string
            f.write(str(classification_report(y_test1, pred1))) # Convert ndarray to string
            f.write("\n\n")
        
        

  data_encoded = data.replace(mapping)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[[137  47]
 [ 27  36]]
              precision    recall  f1-score   support

           0       0.84      0.74      0.79       184
           1       0.43      0.57      0.49        63

    accuracy                           0.70       247
   macro avg       0.63      0.66      0.64       247
weighted avg       0.73      0.70      0.71       247

[[ 8  0]
 [18 14]]
              precision    recall  f1-score   support

           0       0.31      1.00      0.47         8
           1       1.00      0.44      0.61        32

    accuracy                           0.55        40
   macro avg       0.65      0.72      0.54        40
weighted avg       0.86      0.55      0.58        40



  data_encoded = data.replace(mapping)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[[157  27]
 [ 42  21]]
              precision    recall  f1-score   support

           0       0.79      0.85      0.82       184
           1       0.44      0.33      0.38        63

    accuracy                           0.72       247
   macro avg       0.61      0.59      0.60       247
weighted avg       0.70      0.72      0.71       247

[[ 8  0]
 [27  5]]
              precision    recall  f1-score   support

           0       0.23      1.00      0.37         8
           1       1.00      0.16      0.27        32

    accuracy                           0.33        40
   macro avg       0.61      0.58      0.32        40
weighted avg       0.85      0.33      0.29        40

[[184   0]
 [  3  60]]
              precision    recall  f1-score   support

           0       0.98      1.00      0.99       184
           1       1.00      0.95      0.98        63

    accuracy                           0.99       247
   macro avg       0.99      0.98      0.98       247
weighted 

  data_encoded = data.replace(mapping)
  data_encoded = data.replace(mapping)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  data_encoded = data.replace(mapping)
  return fit_method(estimator, *args, **kwargs)


[[184   0]
 [  3  60]]
              precision    recall  f1-score   support

           0       0.98      1.00      0.99       184
           1       1.00      0.95      0.98        63

    accuracy                           0.99       247
   macro avg       0.99      0.98      0.98       247
weighted avg       0.99      0.99      0.99       247

[[ 8  0]
 [ 2 30]]
              precision    recall  f1-score   support

           0       0.80      1.00      0.89         8
           1       1.00      0.94      0.97        32

    accuracy                           0.95        40
   macro avg       0.90      0.97      0.93        40
weighted avg       0.96      0.95      0.95        40



  return fit_method(estimator, *args, **kwargs)
  data_encoded = data.replace(mapping)


[[184   0]
 [  3  60]]
              precision    recall  f1-score   support

           0       0.98      1.00      0.99       184
           1       1.00      0.95      0.98        63

    accuracy                           0.99       247
   macro avg       0.99      0.98      0.98       247
weighted avg       0.99      0.99      0.99       247

[[ 8  0]
 [ 0 32]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         8
           1       1.00      1.00      1.00        32

    accuracy                           1.00        40
   macro avg       1.00      1.00      1.00        40
weighted avg       1.00      1.00      1.00        40

[[184   0]
 [  3  60]]
              precision    recall  f1-score   support

           0       0.98      1.00      0.99       184
           1       1.00      0.95      0.98        63

    accuracy                           0.99       247
   macro avg       0.99      0.98      0.98       247
weighted 

  data_encoded = data.replace(mapping)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[[178   6]
 [ 21  42]]
              precision    recall  f1-score   support

           0       0.89      0.97      0.93       184
           1       0.88      0.67      0.76        63

    accuracy                           0.89       247
   macro avg       0.88      0.82      0.84       247
weighted avg       0.89      0.89      0.89       247

[[ 8  0]
 [14 18]]
              precision    recall  f1-score   support

           0       0.36      1.00      0.53         8
           1       1.00      0.56      0.72        32

    accuracy                           0.65        40
   macro avg       0.68      0.78      0.63        40
weighted avg       0.87      0.65      0.68        40



In [17]:


for (name, clf) in models_dict.items():
        train_data = pd.read_csv('Company/it_train_new1.csv')
        test_data = pd.read_csv('change_Company.csv')
        test1_data = pd.read_csv('change2_Company.csv')
        X_train, X_test, y_train, y_test = GetPreparedData(train_data,test_data,prep_type=4)
        X_train1, X_test1, y_train1, y_test1 = GetPreparedData(train_data,test1_data,prep_type=4)
        clf.fit(X_train.to_numpy(), y_train.to_numpy())
        clf.fit(X_train1.to_numpy(), y_train1.to_numpy())
        pred = clf.predict(X_test.to_numpy())
        pred1 = clf.predict(X_test1.to_numpy())
        
        print(confusion_matrix(y_test, pred))
        print(classification_report(y_test, pred))
        
        print(confusion_matrix(y_test1, pred1))
        print(classification_report(y_test1, pred1))
        
        #write these to the file
        
        with open("result_it_avc_arfe.txt", 'a') as f:
            f.write(name + "\n")
            f.write("Test Data\n")
            f.write(str(confusion_matrix(y_test, pred))) # Convert ndarray to string
            f.write(str(classification_report(y_test, pred))) # Convert ndarray to string
            f.write("Test1 Data\n")
            f.write(str(confusion_matrix(y_test1, pred1))) # Convert ndarray to string
            f.write(str(classification_report(y_test1, pred1))) # Convert ndarray to string
            f.write("\n\n")
        
        

  data_encoded = data.replace(mapping)
  data_encoded = data.replace(mapping)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[[155  29]
 [ 27  36]]
              precision    recall  f1-score   support

           0       0.85      0.84      0.85       184
           1       0.55      0.57      0.56        63

    accuracy                           0.77       247
   macro avg       0.70      0.71      0.70       247
weighted avg       0.78      0.77      0.77       247

[[ 8  0]
 [22 10]]
              precision    recall  f1-score   support

           0       0.27      1.00      0.42         8
           1       1.00      0.31      0.48        32

    accuracy                           0.45        40
   macro avg       0.63      0.66      0.45        40
weighted avg       0.85      0.45      0.47        40



  data_encoded = data.replace(mapping)
  data_encoded = data.replace(mapping)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[[135  49]
 [ 17  46]]
              precision    recall  f1-score   support

           0       0.89      0.73      0.80       184
           1       0.48      0.73      0.58        63

    accuracy                           0.73       247
   macro avg       0.69      0.73      0.69       247
weighted avg       0.79      0.73      0.75       247

[[ 8  0]
 [14 18]]
              precision    recall  f1-score   support

           0       0.36      1.00      0.53         8
           1       1.00      0.56      0.72        32

    accuracy                           0.65        40
   macro avg       0.68      0.78      0.63        40
weighted avg       0.87      0.65      0.68        40

[[184   0]
 [  3  60]]
              precision    recall  f1-score   support

           0       0.98      1.00      0.99       184
           1       1.00      0.95      0.98        63

    accuracy                           0.99       247
   macro avg       0.99      0.98      0.98       247
weighted 

  data_encoded = data.replace(mapping)
  data_encoded = data.replace(mapping)
  data_encoded = data.replace(mapping)
  data_encoded = data.replace(mapping)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


[[184   0]
 [  3  60]]
              precision    recall  f1-score   support

           0       0.98      1.00      0.99       184
           1       1.00      0.95      0.98        63

    accuracy                           0.99       247
   macro avg       0.99      0.98      0.98       247
weighted avg       0.99      0.99      0.99       247

[[ 8  0]
 [12 20]]
              precision    recall  f1-score   support

           0       0.40      1.00      0.57         8
           1       1.00      0.62      0.77        32

    accuracy                           0.70        40
   macro avg       0.70      0.81      0.67        40
weighted avg       0.88      0.70      0.73        40



  data_encoded = data.replace(mapping)
  data_encoded = data.replace(mapping)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


[[184   0]
 [  3  60]]
              precision    recall  f1-score   support

           0       0.98      1.00      0.99       184
           1       1.00      0.95      0.98        63

    accuracy                           0.99       247
   macro avg       0.99      0.98      0.98       247
weighted avg       0.99      0.99      0.99       247

[[ 8  0]
 [12 20]]
              precision    recall  f1-score   support

           0       0.40      1.00      0.57         8
           1       1.00      0.62      0.77        32

    accuracy                           0.70        40
   macro avg       0.70      0.81      0.67        40
weighted avg       0.88      0.70      0.73        40

[[184   0]
 [  3  60]]
              precision    recall  f1-score   support

           0       0.98      1.00      0.99       184
           1       1.00      0.95      0.98        63

    accuracy                           0.99       247
   macro avg       0.99      0.98      0.98       247
weighted 

  data_encoded = data.replace(mapping)
  data_encoded = data.replace(mapping)
  data_encoded = data.replace(mapping)
  data_encoded = data.replace(mapping)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[[184   0]
 [  3  60]]
              precision    recall  f1-score   support

           0       0.98      1.00      0.99       184
           1       1.00      0.95      0.98        63

    accuracy                           0.99       247
   macro avg       0.99      0.98      0.98       247
weighted avg       0.99      0.99      0.99       247

[[ 8  0]
 [ 5 27]]
              precision    recall  f1-score   support

           0       0.62      1.00      0.76         8
           1       1.00      0.84      0.92        32

    accuracy                           0.88        40
   macro avg       0.81      0.92      0.84        40
weighted avg       0.92      0.88      0.88        40

