In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.ensemble import RandomForestClassifier  # Import RandomForestClassifier
from sklearn.pipeline import make_pipeline
from joblib import dump, load

In [6]:
def preprocess_data(data):
    symptoms_columns = [f'Symptom_{i}' for i in range(1, 11)]
    data['Combined_Symptoms'] = data[symptoms_columns].apply(lambda row: ', '.join(filter(None, map(str, row))), axis=1)
    return data[['Combined_Symptoms', 'Disease']].drop_duplicates()

def train_model(data):
    X_train, X_test, y_train, y_test = train_test_split(data['Combined_Symptoms'], data['Disease'], test_size=0.2, random_state=42)

    # Use RandomForestClassifier instead of DecisionTreeClassifier
    model = make_pipeline(CountVectorizer(), RandomForestClassifier(n_estimators=200, random_state=42))
    model.fit(X_train, y_train)

    accuracy = model.score(X_test, y_test)
    print(f'Model Accuracy: {accuracy}')

    return model


def predict_disease(model, symptoms):
    return model.predict([symptoms])[0]

def save_model(model, filename='disease_prediction_model.joblib'):
    dump(model, filename)

def load_model(filename='disease_prediction_model.joblib'):
    return load(filename)

if __name__ == '__main__':
    # Load your dataset from the CSV file
    dataset = pd.read_csv('dataset.csv')

    # Preprocess the data
    preprocessed_data = preprocess_data(dataset)

    # Train the model
    trained_model = train_model(preprocessed_data)

    # Save the trained model for future use
    save_model(trained_model)

    # Example of how to load the model and make a prediction
    loaded_model = load_model()
    symptoms_input = input('Enter symptoms (comma-separated): ')
    predicted_disease = predict_disease(loaded_model, symptoms_input)

    print(f'Predicted Disease: {predicted_disease}')

Model Accuracy: 1.0
Enter symptoms (comma-separated): wateirng from eyes
Predicted Disease: Bronchial Asthma
