In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.metrics import accuracy_score
from joblib import dump
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier, ExtraTreesClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
import warnings         

# Suppress warnings
warnings.filterwarnings('ignore')

# Load dataset
data = pd.read_csv('veri.csv')

# Prepare features and target
X = data.drop(columns=['Risk Level'])
y = data['Risk Level']

# One-hot encode categorical features
categorical_features = ['Patch Update Frequency']
encoder = OneHotEncoder(sparse_output=False, drop='first')
encoded_features = encoder.fit_transform(X[categorical_features])
encoded_feature_names = encoder.get_feature_names_out(categorical_features)

# Combine encoded features with numerical features
X_encoded = pd.DataFrame(encoded_features, columns=encoded_feature_names)
X_numeric = X.drop(columns=categorical_features)
X_final = pd.concat([X_numeric, X_encoded], axis=1)

# Label encode the target
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_final, y_encoded, test_size=0.2, random_state=42)

# Define algorithms
algorithms = {
    'Logistic Regression': LogisticRegression(),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(random_state=42),
    'Gradient Boosting': GradientBoostingClassifier(random_state=42),
    'AdaBoost': AdaBoostClassifier(random_state=42),
    'Extra Trees': ExtraTreesClassifier(random_state=42),
    'Support Vector Machine': SVC(),
    'Naive Bayes': GaussianNB(),
    'Neural Network': MLPClassifier(random_state=42)
}

# Train and evaluate algorithms
results = {}
for name, model in algorithms.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    results[name] = accuracy
    print(f"{name}: Accuracy = {accuracy * 100:.2f}%")

# Identify the best model
best_model_name = max(results, key=results.get)
best_model = algorithms[best_model_name]
print(f"\nBest Model: {best_model_name} with Accuracy = {results[best_model_name] * 100:.2f}%")

# Save the best model
dump(best_model, 'eniyi.joblib')
print("Best model saved as eniyi.joblib.")


Logistic Regression: Accuracy = 97.00%
K-Nearest Neighbors: Accuracy = 93.00%
Decision Tree: Accuracy = 100.00%
Random Forest: Accuracy = 100.00%
Gradient Boosting: Accuracy = 100.00%
AdaBoost: Accuracy = 100.00%
Extra Trees: Accuracy = 99.00%
Support Vector Machine: Accuracy = 93.00%
Naive Bayes: Accuracy = 98.00%
Neural Network: Accuracy = 91.50%

Best Model: Decision Tree with Accuracy = 100.00%
Best model saved as eniyi.joblib.
