In [19]:
import requests
import json
import pandas as pd
priority_mapping = {
    "HIGH": 10,
    "MEDIUM": 5,
    "LOW": 1,
    "NONE":0
}
attack_type_map = { "sqli" : "CWE-89", "xss" : "CWE-79", "cmdi" : "CWE-94"}


In [20]:
xss = 'CWE-79'
sqli = 'CWE-89'
cmdi = 'CWE-94'
vuln_types = [sqli, xss, cmdi]

In [21]:
desc_data = []
conf_impact = []
int_impact = []
avail_impact = []
api_url = 'https://services.nvd.nist.gov/rest/json/cves/1.0/?pubStartDate=2022-01-01T00:00:00:000%20UTC-05:00&pubEndDate=2022-03-01T23:59:59:999%20UTC-05:00'

for key, value in attack_type_map.items():
    print("1")
    params = {"cweId": value, "resultsPerPage": 500}
    response = requests.get(api_url,params=params)
    data = response.json()
    for cve_item in data['result']['CVE_Items']:
        cve = cve_item['cve']
        if 'description' in cve:
            for description_data in cve['description']['description_data']:
                desc = description_data['value']
                desc_data.append(desc)
    for cve_item_2 in data['result']['CVE_Items']:
        cve = cve_item_2['cve']
        if 'problemtype' in cve:
            for problemtype_data in cve['problemtype']['problemtype_data']:
                if 'description' in problemtype_data:
                    cvss = cve_item_2['impact']['baseMetricV3']['cvssV3']
                    if 'confidentialityImpact' in cvss:
                        conf_impact.append(cvss['confidentialityImpact'])
                    if 'integrityImpact' in cvss:
                        int_impact.append(cvss['integrityImpact'])
                    if 'availabilityImpact' in cvss:
                        avail_impact.append(cvss['availabilityImpact'])


1
1
1


In [22]:
import numpy as np
desc_data = np.array(desc_data)
conf_impact = np.array(conf_impact)
int_impact = np.array(int_impact)
avail_impact = np.array(avail_impact)
print(desc_data.shape)
print(conf_impact.shape)
print(int_impact.shape)
print(avail_impact.shape)

(674,)
(674,)
(674,)
(674,)


In [23]:
csv_data = {'Description' : desc_data,'Confidentiality Impact ': conf_impact, 'Integrity Impact': int_impact, 'Availability Impact': avail_impact}
df = pd.DataFrame(csv_data)
df.info
df.to_csv("desc_impact.csv")

In [24]:
csv_data = {'Description' : desc_data,'Confidentiality Impact': conf_impact}
df = pd.DataFrame(csv_data)
df.info
df.to_csv("conf_impact.csv")

In [25]:
csv_data = {'Description' : desc_data,'Integrity Impact': int_impact}
df = pd.DataFrame(csv_data)
df.info
df.to_csv("int_impact.csv")

In [26]:
csv_data = {'Description' : desc_data, 'Availability Impact': avail_impact}
df = pd.DataFrame(csv_data)
df.info
df.to_csv("avail_impact.csv")

In [27]:
vuln_impacts = {}
for vuln_type in vuln_types:
    vuln_impacts[vuln_type] = {'Confidentiality': 0, 'Integrity': 0, 'Availability': 0}

In [28]:
file_path = "C:/Users/kisho/Documents/GitHub/Risk_Quantification/Risk_Quantifier/Criteria_Impact"
import joblib

def dump(model , filename):
    with open(filename, "wb") as f:
        joblib.dump(model , f)

In [29]:
import pandas as pd
import numpy as np
import re
import nltk
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix

# load the dataset
conf_data = pd.read_csv("conf_impact.csv",encoding='latin-1')
int_data = pd.read_csv("int_impact.csv",encoding='latin-1')
avail_data = pd.read_csv("avail_impact.csv",encoding='latin-1')


In [30]:
cv = CountVectorizer()


In [31]:
# Confidentiality Data
X_conf = cv.fit_transform(conf_data['Description'])
y_conf = conf_data['Confidentiality Impact']
X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(X_conf, y_conf, test_size=0.2, random_state=42)

# Integrity Data
X_int = cv.fit_transform(int_data['Description'])
y_int = int_data['Integrity Impact']
X_train_i, X_test_i, y_train_i, y_test_i = train_test_split(X_int, y_int, test_size=0.2, random_state=42)

# vAvailability Data
X_avail = cv.fit_transform(avail_data['Description'])
y_avail = avail_data['Availability Impact']
X_train_a, X_test_a, y_train_a, y_test_a = train_test_split(X_avail, y_avail, test_size=0.2, random_state=42)

dump(cv , filename=f"{file_path}/cv.pkl")


In [32]:
# train a Naive Bayes classifier
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier , VotingClassifier




clf1 = RandomForestClassifier(n_estimators=30, max_depth=5)
clf2 = DecisionTreeClassifier(max_depth=10)
clf3 = KNeighborsClassifier(n_neighbors=100)

ensemble_desc = VotingClassifier(estimators=[('clf1', clf1), ('clf2', clf2), ('clf3', clf3)], voting='soft')



In [33]:
ensemble_desc.fit(X_train_c,y_train_c)
y_ens = ensemble_desc.predict(X_test_c)
dump(ensemble_desc , filename=f"{file_path}/ens_conf.pkl")


print("[+] ACCURACY \n")

print("Ensembled Classifier for Confidentiality : " , accuracy_score(y_test_c , y_ens))

[+] ACCURACY 

Ensembled Classifier for Confidentiality :  0.9703703703703703


In [34]:
ensemble_desc.fit(X_train_i,y_train_i)
y_ens = ensemble_desc.predict(X_test_i)
dump(ensemble_desc , filename=f"{file_path}/ens_int.pkl")


print("[+] ACCURACY \n")

print("Ensembled Classifier for Integrity : " , accuracy_score(y_test_i , y_ens))

[+] ACCURACY 

Ensembled Classifier for Integrity :  0.9333333333333333


In [35]:
ensemble_desc.fit(X_train_a,y_train_a)
y_ens = ensemble_desc.predict(X_test_a)
dump(ensemble_desc , filename=f"{file_path}/ens_avail.pkl")


print("[+] ACCURACY \n")

print("Ensembled Classifier for Availability : " , accuracy_score(y_test_a , y_ens))

[+] ACCURACY 

Ensembled Classifier for Availability :  0.9555555555555556


In [36]:
new_description = 'This attack is an example of Blind SQL Injection, where the attacker attempts to extract sensitive information from the database by modifying the original query in a way that causes the application to behave differently based on whether the injected condition is true or false, and using conditional queries to extract information. In this case, the attacker is attempting to extract the password character by character from a table called "admin", using the "substring" function and a conditional query to compare each character to the value "7"'
new_X = cv.transform([new_description])
print(new_X)
conf_model = joblib.load("ens_conf.pkl")
int_model = joblib.load("ens_int.pkl")
avail_model = joblib.load("ens_avail.pkl")
prediction = conf_model.predict(new_X)[0]
print('The predicted confidentiality impact for the given description is:', prediction)


  (0, 239)	1
  (0, 294)	1
  (0, 297)	2
  (0, 341)	1
  (0, 380)	1
  (0, 382)	2
  (0, 426)	1
  (0, 455)	1
  (0, 496)	2
  (0, 531)	1
  (0, 537)	1
  (0, 638)	1
  (0, 741)	1
  (0, 951)	1
  (0, 989)	3
  (0, 1001)	1
  (0, 1077)	2
  (0, 1086)	1
  (0, 1255)	2
  (0, 1270)	2
  (0, 1275)	1
  (0, 1277)	1
  (0, 1324)	3
  (0, 1620)	1
  (0, 1711)	1
  (0, 1717)	1
  (0, 1741)	1
  (0, 1805)	1
  (0, 2011)	1
  (0, 2012)	2
  (0, 2222)	1
  (0, 2318)	1
  (0, 2372)	1
  (0, 2409)	1
  (0, 2448)	1
  (0, 2449)	9
  (0, 2464)	2
  (0, 2482)	6
  (0, 2511)	1
  (0, 2606)	2
  (0, 2634)	1
  (0, 2690)	1
  (0, 2713)	1
The predicted confidentiality impact for the given description is: HIGH
