In [59]:
import pandas as pd
import numpy as np
import pickle

In [60]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer

In [61]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier

In [62]:
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, HistGradientBoostingClassifier

In [63]:
from sklearn.metrics import accuracy_score

In [64]:
df=pd.read_csv('D:/Working-project/finalData.csv')

SEPARATE FEATURES AND TARGET


In [65]:
target_column = 'Risk Level'
x = df.drop(columns=[target_column])
y = df[target_column]

ENCODE THE TARGET VARIABLE


In [66]:
encoder = LabelEncoder()
y = encoder.fit_transform(y)

HANDLE MISSING VALUES

In [67]:
imputer = SimpleImputer(strategy='mean')
x = imputer.fit_transform(x)


SPLIT THE DATA

In [68]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [69]:
with open('trained_model.pkl', 'wb') as file:
   model= pickle.dump(model, file)
print(model)

None


STANDARDIZE THE FEATURES

In [70]:
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

MODEL USED

In [71]:
models = {
    "Logistic Regression": LogisticRegression(),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Support Vector Classifier": SVC(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "HistGradientBoosting": HistGradientBoostingClassifier(),
}

EVALUATING EACH MODEL

In [72]:
results = {}
for model_name, model in models.items():
    model.fit(x_train, y_train)               # Train the model
    y_pred = model.predict(x_test)            # Predict on the test set
    accuracy = accuracy_score(y_test, y_pred)    # Evaluate the model
    results[model_name] = accuracy

ACCURACY

In [73]:
for model_name, accuracy in results.items():
    print(f"{model_name}: {accuracy:.2f}")

Logistic Regression: 0.88
K-Nearest Neighbors: 0.90
Support Vector Classifier: 0.92
Decision Tree: 0.90
Random Forest: 0.91
HistGradientBoosting: 0.93


In [74]:
best_model_name = max(results, key=results.get)
best_model = models[best_model_name]
print(f"\nBest Model: {best_model_name} with accuracy {results[best_model_name]:.2f}")



Best Model: HistGradientBoosting with accuracy 0.93


In [75]:
input_data = (20,110,70,9,102,78)  # Example input
std_data = scaler.transform([input_data])
prediction = best_model.predict(std_data)

In [76]:
predicted_label = encoder.inverse_transform(prediction)
print("Prediction (decoded):", predicted_label)

Prediction (decoded): ['High']
