### ML-based Cyber Risk Assessment for Vulnerability Severity Predictions
A machine learning model is used to predict a vulnerability's CVSS Base Score for improved risk management. Using the scraped data which is then integrated with the dataset from NVD, CISA KEV, and EPSS, the model provides a data-driven approach to prioritize threats and streamline vulnerability assessment.

##### Import Packages

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from  IPython.display import display
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.multioutput import MultiOutputRegressor

##### Data Loading and Initial Checks

In [None]:
# Load data
df = pd.read_csv('cybersec_vulnerability.csv')
df

Unnamed: 0,cve_id,base_severity,base_score,exploitability_score,impact_score,epss_score,epss_perc,cisa_kev,attack_vector,attack_complexity,privileges_required,user_interaction,scope,confidentiality_impact,integrity_impact,availability_impact,published_date
0,CVE-1999-0199,CRITICAL,9.8,3.9,5.9,0.00677,0.70539,False,NETWORK,LOW,NONE,NONE,UNCHANGED,HIGH,HIGH,HIGH,2020-10-06T13:15Z
1,CVE-2002-20001,HIGH,7.5,3.9,3.6,0.11632,0.93382,False,NETWORK,LOW,NONE,NONE,UNCHANGED,NONE,NONE,HIGH,2021-11-11T19:15Z
2,CVE-2002-2438,HIGH,7.5,3.9,3.6,0.04663,0.88880,False,NETWORK,LOW,NONE,NONE,UNCHANGED,NONE,NONE,HIGH,2021-05-18T12:15Z
3,CVE-2002-2439,HIGH,7.8,1.8,5.9,0.00137,0.34244,False,LOCAL,LOW,LOW,NONE,UNCHANGED,HIGH,HIGH,HIGH,2019-10-23T18:15Z
4,CVE-2002-2444,CRITICAL,9.8,3.9,5.9,0.00476,0.63885,False,NETWORK,LOW,NONE,NONE,UNCHANGED,HIGH,HIGH,HIGH,2019-10-28T14:15Z
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155847,CVE-2025-9098,MEDIUM,5.3,1.8,3.4,0.00012,0.01104,False,LOCAL,LOW,LOW,NONE,UNCHANGED,LOW,LOW,LOW,2025-08-18T01:15Z
155848,CVE-2025-9099,MEDIUM,6.3,2.8,3.4,0.00034,0.07789,False,NETWORK,LOW,LOW,NONE,UNCHANGED,LOW,LOW,LOW,2025-08-18T01:15Z
155849,CVE-2025-9108,MEDIUM,4.3,2.8,1.4,0.00026,0.05527,False,NETWORK,LOW,NONE,REQUIRED,UNCHANGED,NONE,LOW,NONE,2025-08-18T06:15Z
155850,CVE-2025-9109,LOW,3.7,2.2,1.4,0.00027,0.05739,False,NETWORK,HIGH,NONE,NONE,UNCHANGED,LOW,NONE,NONE,2025-08-18T06:15Z


In [None]:
#summary statistics

df.describe()

Unnamed: 0,base_score,exploitability_score,impact_score,epss_score,epss_perc
count,155852.0,155852.0,155852.0,155852.0,155852.0
mean,7.142007,2.691608,4.304447,0.027664,0.449421
std,1.701806,0.939721,1.533836,0.116194,0.274225
min,1.6,0.1,1.4,1e-05,1e-05
25%,5.5,1.8,3.4,0.00072,0.223928
50%,7.5,2.8,3.6,0.00206,0.43042
75%,8.8,3.9,5.9,0.00529,0.66212
max,10.0,3.9,6.0,0.94582,1.0


In [None]:
#check total count and nulls counts
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 155852 entries, 0 to 155851
Data columns (total 17 columns):
 #   Column                  Non-Null Count   Dtype  
---  ------                  --------------   -----  
 0   cve_id                  155852 non-null  object 
 1   base_severity           155852 non-null  object 
 2   base_score              155852 non-null  float64
 3   exploitability_score    155852 non-null  float64
 4   impact_score            155852 non-null  float64
 5   epss_score              155852 non-null  float64
 6   epss_perc               155852 non-null  float64
 7   cisa_kev                155852 non-null  bool   
 8   attack_vector           155852 non-null  object 
 9   attack_complexity       155852 non-null  object 
 10  privileges_required     155852 non-null  object 
 11  user_interaction        155852 non-null  object 
 12  scope                   155852 non-null  object 
 13  confidentiality_impact  155852 non-null  object 
 14  integrity_impact    

In [None]:
#check columns that can be encoded
for i in df.columns:
    if df[i].dtype == 'int64':
        pass
    else:
        display(df[i].value_counts())

cve_id
CVE-2025-9119     1
CVE-1999-0199     1
CVE-2002-20001    1
CVE-2002-2438     1
CVE-2002-2439     1
                 ..
CVE-2005-2351     1
CVE-2005-2350     1
CVE-2005-2349     1
CVE-2005-10003    1
CVE-2005-10002    1
Name: count, Length: 155852, dtype: int64

base_severity
MEDIUM      67272
HIGH        61680
CRITICAL    24153
LOW          2747
Name: count, dtype: int64

base_score
9.8    21132
7.5    17519
8.8    16618
7.8    15727
5.5    11620
       ...  
3.0        5
1.8        3
2.9        2
1.6        2
1.9        1
Name: count, Length: 81, dtype: int64

exploitability_score
3.9    45861
2.8    45086
1.8    29995
2.3     9912
1.2     5501
1.7     3376
2.2     3294
0.8     2871
1.6     1661
0.9     1634
1.0     1419
3.1     1290
2.1     1110
2.0      675
1.3      566
0.5      349
1.5      343
0.7      300
2.5      224
1.1      130
0.6       97
0.3       81
1.4       45
0.4       23
0.2        6
0.1        3
Name: count, dtype: int64

impact_score
5.9    63296
3.6    42448
2.7    24695
1.4    12101
5.2     4552
6.0     2323
2.5     1990
4.0     1272
3.4     1113
4.2      860
4.7      597
5.8      240
3.7      157
5.5      154
5.3       54
Name: count, dtype: int64

epss_score
0.00058    1028
0.00037     995
0.00039     959
0.00041     937
0.00047     790
           ... 
0.37251       1
0.85917       1
0.22095       1
0.28831       1
0.15766       1
Name: count, Length: 12992, dtype: int64

epss_perc
0.47151    534
0.46629    507
0.42983    316
0.17877    311
0.11105    215
          ... 
0.70962      1
0.20389      1
0.20387      1
0.07340      1
0.06812      1
Name: count, Length: 63194, dtype: int64

cisa_kev
False    154889
True        963
Name: count, dtype: int64

attack_vector
NETWORK             114452
LOCAL                36402
ADJACENT_NETWORK      3460
PHYSICAL              1538
Name: count, dtype: int64

attack_complexity
LOW     148386
HIGH      7466
Name: count, dtype: int64

privileges_required
NONE    90209
LOW     52391
HIGH    13252
Name: count, dtype: int64

user_interaction
NONE        104193
REQUIRED     51659
Name: count, dtype: int64

scope
UNCHANGED    126264
CHANGED       29588
Name: count, dtype: int64

confidentiality_impact
HIGH    88179
LOW     34410
NONE    33263
Name: count, dtype: int64

integrity_impact
HIGH    75476
NONE    47106
LOW     33270
Name: count, dtype: int64

availability_impact
HIGH    88295
NONE    63992
LOW      3565
Name: count, dtype: int64

published_date
2018-07-09T06:29Z    322
2018-06-11T21:29Z    318
2019-09-27T19:15Z    262
2025-02-26T07:01Z    257
2019-12-18T18:15Z    249
                    ... 
2025-08-11T15:15Z      1
2025-08-11T09:15Z      1
2025-08-11T07:15Z      1
2025-08-11T03:15Z      1
2025-08-10T14:15Z      1
Name: count, Length: 26822, dtype: int64

##### Feature Engineering

##### Model Building and Training

##### Evaluation and Iteration