In [1]:
import requests
import pandas as pd

# ✅ Corrected CVE API Endpoint
CVE_API_URL = "https://services.nvd.nist.gov/rest/json/cves/2.0"

# Set headers to properly request data
headers = {"Accept": "application/json"}

# Fetch CVE Data
response = requests.get(CVE_API_URL, headers=headers)

# Check if API request was successful
if response.status_code == 200 and response.text.strip():
    try:
        cve_data = response.json()

        # Extract Relevant Information
        cve_list = []
        for item in cve_data.get('vulnerabilities', []):
            cve_id = item['cve']['id']
            description = item['cve']['descriptions'][0]['value']
            severity = item['cve'].get('metrics', {}).get('cvssMetricV2', [{}])[0].get('baseSeverity', 'Unknown')

            cve_list.append({"CVE_ID": cve_id, "Description": description, "Severity": severity})

        # Convert to DataFrame for AI Training
        cve_df = pd.DataFrame(cve_list)
        cve_df.to_csv("cve_data.csv", index=False)
        print("✅ CVE Data Successfully Fetched & Saved!")

    except requests.exceptions.JSONDecodeError:
        print("⚠ Error: Failed to decode JSON response!")
else:
    print(f"⚠ Error: Received unexpected response ({response.status_code})")

✅ CVE Data Successfully Fetched & Saved!


In [2]:
import pandas as pd
import numpy as np
import re
from sklearn.preprocessing import LabelEncoder

# Load the CVE dataset
df = pd.read_csv("cve_data.csv")

# ❌ Remove all rows with unknown severity
df = df[df["Severity"] != "Unknown"]

# Continue with your severity mapping
severity_mapping = {
    "Critical": 4,
    "High": 3,
    "Medium": 2,
    "Low": 1
}
df["Severity_Level"] = df["Severity"].map(severity_mapping)


# Extract keywords from descriptions for AI model training
def extract_keywords(description):
    words = re.findall(r'\b\w+\b', description.lower())  # Tokenize words
    important_words = [word for word in words if len(word) > 3]  # Keep meaningful words
    return " ".join(important_words)

df["Processed_Description"] = df["Description"].apply(extract_keywords)

# Save the preprocessed data
df.to_csv("processed_cve_data.csv", index=False)
print("✅ CVE Data Successfully Preprocessed & Saved!")

✅ CVE Data Successfully Preprocessed & Saved!


In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_extraction.text import TfidfVectorizer
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau



# Load preprocessed CVE dataset
df = pd.read_csv("cve_data.csv")

# Convert severity labels to numerical values
# Normalize and map severity
df["Severity"] = df["Severity"].str.strip().str.capitalize()
severity_mapping = {"Critical": 4, "High": 3, "Medium": 2, "Low": 1}
df["Severity_Level"] = df["Severity"].map(severity_mapping)

# Drop rows where severity still couldn't be mapped
df = df.dropna(subset=["Severity_Level"])
df["Severity_Level"] = df["Severity_Level"].astype(int)


# Select features and labels
X = df.drop(columns=["CVE_ID", "Description", "Severity"])
y = df["Severity_Level"]

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [4]:
print("Unique Severity Values:", df["Severity_Level"].unique())  # Should be 0, 1, 2, 3, 4

Unique Severity Values: [3 2 1]


In [5]:
df["Severity_Level"] = df["Severity_Level"].fillna(0).astype(int)

In [6]:
df["Severity_Level"] = df["Severity_Level"].apply(lambda x: min(max(x, 0), 4))  # Clamp values to [0, 4]

In [7]:
y_train = np.array(y_train, dtype=np.int32)
y_test = np.array(y_test, dtype=np.int32)

In [8]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.utils import class_weight
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Load the CVE dataset
df = pd.read_csv("cve_data.csv")

# Map severity to numerical labels
severity_mapping = {"Critical": 4, "High": 3, "Medium": 2, "Low": 1, "Unknown": 0}
df["Severity_Level"] = df["Severity"].map(severity_mapping)
df["Severity_Level"] = df["Severity_Level"].fillna(0).astype(int)

# Use Description for feature engineering
df["Description"] = df["Description"].fillna("")
tfidf = TfidfVectorizer(max_features=500)  # limit to top 500 words
X_tfidf = tfidf.fit_transform(df["Description"]).toarray()

# Labels
y = df["Severity_Level"].values

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.2, random_state=42)

# Define neural network model
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(4, activation='softmax')  # Only 4 classes now: Low to Critical
])

# Compile model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Callbacks setup
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6)

# Compute class weights
class_weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train),
    y=y_train
)
class_weights_dict = dict(enumerate(class_weights))

# Train model with callbacks and class weights (only once!)
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,  # Train for 50 epochs
    batch_size=32,
    callbacks=[early_stopping, reduce_lr],
    class_weight=class_weights_dict  # Add class weights
)

# Print training history
print(f"Training completed. Epochs run: {len(history.history['loss'])}")

# Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"\n✅ Final Loss: {test_loss:.4f} | Accuracy: {test_acc:.4f}")
print("✅ AI Model Training Complete!")

Epoch 1/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.8891 - loss: 0.9736 - val_accuracy: 1.0000 - val_loss: 0.0031 - learning_rate: 0.0010
Epoch 2/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 1.0000 - loss: 0.0021 - val_accuracy: 1.0000 - val_loss: 3.5035e-04 - learning_rate: 0.0010
Epoch 3/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 1.0000 - loss: 5.6508e-04 - val_accuracy: 1.0000 - val_loss: 2.2039e-04 - learning_rate: 0.0010
Epoch 4/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 1.0000 - loss: 3.5358e-04 - val_accuracy: 1.0000 - val_loss: 1.5060e-04 - learning_rate: 0.0010
Epoch 5/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 1.0000 - loss: 2.9180e-04 - val_accuracy: 1.0000 - val_loss: 1.0574e-04 - learning_rate: 0.0010
Epoch 6/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[

In [10]:
import joblib # Import the joblib library

# Evaluate model accuracy
loss, accuracy = model.evaluate(X_test, y_test)
print(f"✅ Model Accuracy: {accuracy:.2f}")
print(f"📉 Model Loss: {loss:.4f}")
joblib.dump(tfidf, "tfidf_vectorizer.pkl")
model.save("cve_severity_model.h5")

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 1.0000 - loss: 2.3884e-05 




✅ Model Accuracy: 1.00
📉 Model Loss: 0.0000


In [14]:
import requests
import pandas as pd
import numpy as np
import tensorflow as tf
import joblib
import random

# Load vectorizer and model
tfidf = joblib.load("tfidf_vectorizer.pkl")
model = tf.keras.models.load_model("cve_severity_model.h5")

# Load latest CVE data
df = pd.read_csv("latest_cve_data.csv")

# Drop empty descriptions (if any)
df = df[df["Description"].notnull()]

# Randomly select one CVE sample
# random_index = random.randint(0, len(df) - 1)
# sample = df.iloc[random_index:random_index+1]
sample = df.iloc[[0]] # Select the first sample

# Vectorize description using the loaded tfidf
X_new = tfidf.transform(sample["Description"]).toarray()

# Predict
predictions = model.predict(X_new)
predicted_index = np.argmax(predictions[0])  # Get index of highest probability
confidence = predictions[0][predicted_index] # Confidence of prediction

# Severity labels (assuming your model outputs 0-3 for Low-Critical)
severity_labels = ["Unknown", "Low", "Medium", "High", "Critical"]  # Adjust labels if needed
label = severity_labels[predicted_index]

print(f"🔒 Predicted Severity: {label} (Confidence: {confidence:.2f})")



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
🔒 Predicted Severity: Unknown (Confidence: 1.00)


In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import joblib
import random

# Load vectorizer and model
tfidf = joblib.load("tfidf_vectorizer.pkl")
X_new = tfidf.transform(df["Description"]).toarray()
model = tf.keras.models.load_model("cve_severity_model.h5")

# Load latest CVE data
df = pd.read_csv("latest_cve_data.csv")

# Drop empty descriptions (if any)
df = df[df["Description"].notnull()]

# Randomly select one CVE sample
random_index = random.randint(0, len(df) - 1)
sample = df.iloc[random_index:random_index+1]

# Vectorize description
X_new = tfidf.transform(sample["Description"]).toarray()

# Predict
predictions = model.predict(X_new)
predicted_index = np.argmax(predictions[0])
confidence = predictions[0][predicted_index]

# Severity labels
severity_labels = ["Low", "Medium", "High", "Critical"]
label = severity_labels[predicted_index]


print(f"🔒 Predicted Severity: {label}")



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step
🔒 Predicted Severity: Low
