In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_curve, auc

In [None]:
# Load dataset (replace with actual dataset)
data = pd.read_csv('medical_data.csv')  # Example dataset file

In [None]:
# Preprocessing
le = LabelEncoder()
data['Disease'] = le.fit_transform(data['Disease'])
X = data.drop(columns=['Disease'])
y = data['Disease']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [None]:
# Hyperparameter tuning using GridSearchCV
param_grid_svm = {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf']}
grid_svm = GridSearchCV(SVC(probability=True), param_grid_svm, cv=10)
grid_svm.fit(X_train, y_train)

param_grid_knn = {'n_neighbors': [3, 5, 7]}
grid_knn = GridSearchCV(KNeighborsClassifier(), param_grid_knn, cv=10)
grid_knn.fit(X_train, y_train)

param_grid_rf = {'n_estimators': [50, 100, 150]}
grid_rf = GridSearchCV(RandomForestClassifier(), param_grid_rf, cv=10)
grid_rf.fit(X_train, y_train)

In [None]:
# Best models
svm_model = grid_svm.best_estimator_
knn_model = grid_knn.best_estimator_
rf_model = grid_rf.best_estimator_

In [None]:
# Predictions
y_pred_svm = svm_model.predict(X_test)
y_pred_knn = knn_model.predict(X_test)
y_pred_rf = rf_model.predict(X_test)


In [None]:
# Classification reports
print("SVM Classification Report:\n", classification_report(y_test, y_pred_svm))
print("KNN Classification Report:\n", classification_report(y_test, y_pred_knn))
print("Random Forest Classification Report:\n", classification_report(y_test, y_pred_rf))


In [None]:
# Confusion matrices
plt.figure(figsize=(12, 4))
for i, (model, y_pred, title) in enumerate(zip([svm_model, knn_model, rf_model], 
                                               [y_pred_svm, y_pred_knn, y_pred_rf], 
                                               ['SVM', 'KNN', 'Random Forest'])):
    plt.subplot(1, 3, i + 1)
    sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Blues')
    plt.title(f'{title} Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
plt.tight_layout()
plt.show()

In [None]:
# ROC-AUC curves
plt.figure(figsize=(8, 6))
for model, label in zip([svm_model, knn_model, rf_model], ['SVM', 'KNN', 'Random Forest']):
    y_scores = model.predict_proba(X_test)[:, 1]
    fpr, tpr, _ = roc_curve(y_test, y_scores)
    plt.plot(fpr, tpr, label=f'{label} (AUC: {auc(fpr, tpr):.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC-AUC Curve')
plt.legend()
plt.show()

In [None]:
# Function to recommend medicine
def recommend_medicine(symptoms):
    input_data = scaler.transform(np.array(symptoms).reshape(1, -1))
    prediction = rf_model.predict(input_data)
    disease = le.inverse_transform(prediction)[0]
    return f"Recommended treatment for {disease}"