In [None]:
"""
This project predicts diseases based on patients symptoms
SVM is being used to build the model
Using joblib to save the files
"""

# TRAINING

In [None]:
# importing libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
import joblib  


In [24]:
# loading dataset
data = pd.read_csv("Training.csv", usecols = range(133))
# without specifying the number of columns, it is loading one extra column
# need to check if there is an extra comma or white space
 

In [25]:
# data.head(2)

In [26]:
# separating dataset into symptoms and diseases
X = data.iloc[:, :-1]  # taking symptoms and leaving out the disease column
y = data.iloc[:, -1]   # label diseases

In [27]:
# type(y)

In [28]:
# encoding disease names
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)


In [29]:
# spliting data for training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

In [30]:
# initialization
svm_model = SVC()

# training
svm_model.fit(X_train, y_train)

# print ("success")

In [31]:
# evaluation
y_pred = svm_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)


In [32]:
# print(f"Model Accuracy: {accuracy * 100}%")

In [22]:
# saving model, symptom list and encoder using joblib
joblib.dump(svm_model, "svm_disease_model.pkl")
joblib.dump(label_encoder, "disease_label_encoder.pkl") # label to get the disease name from prediction
joblib.dump(X.columns.tolist(), "symptom_list.pkl") # symptom names for validation
# print("Model, label encoder, and symptom list saved successfully.")

# can use pickle as well for saving but joblib is better to deal with large amounts of data


['symptom_list.pkl']

# PREDICTION

In [33]:
# loading model, symptoms list, and encoder
def load_model_and_encodings():
    model = joblib.load("svm_disease_model.pkl")
    label_encoder = joblib.load("disease_label_encoder.pkl")
    symptom_list = joblib.load("symptom_list.pkl")
    return model, label_encoder, symptom_list


In [34]:
# validation and prediction of the disease for a new patient
def predict_disease(input_file):
    # loading model and encodings
    model, label_encoder, symptom_list = load_model_and_encodings()
    
    # loading patient's data
    patient_data = pd.read_csv(input_file,usecols = range(132))
    
    # validation of symptoms
    if list(patient_data.columns) != symptom_list:
        raise ValueError("Error with symptom names. Please check and try again.")
    
    # making prediction
    prediction = model.predict(patient_data)
    disease = label_encoder.inverse_transform(prediction)[0]
    
    return disease

In [36]:
# reading symptoms of a patient and predicting his/her disease
try:
    predicted_disease = predict_disease("Patient.csv")
    print("Predicted Disease:",predicted_disease)
except ValueError as e:
    print(e)


Predicted Disease: Fungal infection
