In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
import pickle

In [5]:
df = pd.read_csv('cleaned_dataset.csv')

In [6]:
df.head()

Unnamed: 0,Sneezing,Runny_Nose,Headache,High Fever,Bodyache,Blocked_Nose,Tonsils,Throught_Iritation,Fatigue,Less_of_appetite,...,Painful urination,Frequent urge to urinate,Cloudy or strong-smelling urine,Pelvic pain,Increased Appetite,Sweating,Hair loss,Photosensitivity,Oral ulcers,Risk
0,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,High
1,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,High
2,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,High
3,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,High
4,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,High


In [7]:
# X contains features (all columns except 'Risk')
X = df.drop('Risk', axis=1)
# y contains the target column 'Risk'
y = df['Risk']

# Initialize the LabelEncoder and encode the target column
le = LabelEncoder()
y = le.fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the models
models = {
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'XGBoost': XGBClassifier(use_label_encoder=False, eval_metric='logloss'),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'SVM': SVC()
}

In [8]:
# Dictionary to store accuracy scores for each model
accuracy_scores = {}

# Iterate through the models, train, and evaluate them
for model_name, model in models.items():
    # Train the model
    model.fit(X_train, y_train)
    
    # Predict on the test set
    y_pred = model.predict(X_test)
    
    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    
    # Store the accuracy score
    accuracy_scores[model_name] = accuracy
    
    # Print accuracy for this model
    print(f"{model_name} Accuracy: {accuracy:.6f}")

# Display the accuracies of all models
print("\nAll Model Accuracies:")
for model_name, accuracy in accuracy_scores.items():
    print(f"{model_name}: {accuracy:.6f}")

# Find the model with the highest accuracy
best_model_name = max(accuracy_scores, key=accuracy_scores.get)
best_model = models[best_model_name]
print(f"\nBest Model: {best_model_name} with Accuracy: {accuracy_scores[best_model_name]:.6f}")

Logistic Regression Accuracy: 0.852761
Decision Tree Accuracy: 0.950920
Random Forest Accuracy: 0.938650
XGBoost Accuracy: 0.950920
K-Nearest Neighbors Accuracy: 0.950920
SVM Accuracy: 0.914110

All Model Accuracies:
Logistic Regression: 0.852761
Decision Tree: 0.950920
Random Forest: 0.938650
XGBoost: 0.950920
K-Nearest Neighbors: 0.950920
SVM: 0.914110

Best Model: Decision Tree with Accuracy: 0.950920


Parameters: { "use_label_encoder" } are not used.



## Saving the model

In [9]:
# Save the best model to a pickle file
with open('Risk_Prediction_Model_V2.pkl', 'wb') as file:
    pickle.dump(best_model, file)

print(f"Best Model '{best_model_name}' saved as Risk_Prediction_Model_V2.pkl")


Best Model 'Decision Tree' saved as Risk_Prediction_Model_V2.pkl


## Saving the encoder

In [10]:
# Save the LabelEncoder to a pickle file
with open('Risk_LabelEncoder_V2.pkl', 'wb') as file:
    pickle.dump(le, file)

## Printing all the symptoms

In [35]:
columns_list = df.columns.tolist()

print(columns_list)

['Sneezing', 'Runny_Nose', 'Headache', 'High Fever', 'Bodyache', 'Blocked_Nose', 'Tonsils', 'Throught_Iritation', 'Fatigue', 'Less_of_appetite', 'Eye Redness', 'Lethargy', 'Dry_Cough', 'Vomiting', 'Nausea', 'Breath_difficulties', 'Painful_swallowing', 'Throaty_voice', 'Smelly_breath', 'Sleepiness', 'Aggressiveness', 'Neckache', 'Nervousness', 'Heavy Headness', 'Burning sensation of stomach', 'Eye_ pain', 'Light Sensitivity', 'Loose tools', 'Stomach ache', 'Constipation', 'Throught burning sensation', 'Sore anus', 'Back Pain', 'Flatulence', 'Radiation feel along legs', 'Muscles stiffnesses of legs', 'Cramps', 'Numbness of Legs and Feet', 'Knee Pain', 'Prolonged numbness', 'Excessive thirst', 'Frequent urination', 'Blurred vision', 'Slow healing of wounds', 'Unexplained weight loss', 'Dizziness', 'Chest pain', 'Chest Tightness', 'Wheezing', 'Difficulty falling asleep', 'Waking up during the night', 'Irritability', 'Difficulty concentrating', 'Joint pain', 'Morning stiffness', 'Swelling',