In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("kaushil268/disease-prediction-using-machine-learning")

print("Path to dataset files:", path)

In [None]:
import pandas as pd

# Read the dataset into a pandas DataFrame
training_data = pd.read_csv(path + "/Training.csv") 
testing_data = pd.read_csv(path + "/Testing.csv")

training_data.head()

In [None]:
testing_data.columns

In [None]:
training_data = training_data.drop('Unnamed: 133', axis=1)
# testing_data = testing_data.drop('Unnamed: 133', axis=1)
X_train = training_data.drop('prognosis', axis=1)
y_train = training_data['prognosis']

X_test = testing_data.drop('prognosis', axis=1)
y_test = testing_data['prognosis']


In [None]:
training_data.columns

In [20]:
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier, ExtraTreesClassifier, VotingClassifier, BaggingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

rfmodel = RandomForestClassifier(n_estimators=100)
gbmodel = GradientBoostingClassifier(n_estimators=100)
abmodel = AdaBoostClassifier(n_estimators=100)
etmodel = ExtraTreesClassifier(n_estimators=100)
lrmodel = LogisticRegression()
svcmodel = SVC()
dtmodel = DecisionTreeClassifier()
vcmodel = VotingClassifier(estimators=[('rf', rfmodel), ('gb', gbmodel), ('ab', abmodel), ('et', etmodel), ('lr', lrmodel), ('svc', svcmodel), ('dt', dtmodel)], voting='hard')
bcmodel = BaggingClassifier(n_estimators=100)

models = [rfmodel, gbmodel, abmodel, etmodel, lrmodel, svcmodel, dtmodel, vcmodel, bcmodel]
acc_scores = []
recall_scores = []
precision_scores = []
f1_scores = []

for model in models:
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc_scores.append(accuracy_score(y_test, y_pred))
    recall_scores.append(recall_score(y_test, y_pred, average='micro'))
    precision_scores.append(precision_score(y_test, y_pred, average = 'micro'))
    f1_scores.append(f1_score(y_test, y_pred, average='micro'))
    
table = pd.DataFrame({'Model': ['Random Forest', 'Gradient Boosting', 'AdaBoost', 'Extra Trees', 'Logistic Regression', 'SVC', 'Decision Tree', 
                                'Voting Classifier', 'Bagging Classifier'], 'Accuracy': acc_scores, 'Recall': recall_scores, 'Precision': precision_scores, 'F1': f1_scores})

In [None]:
display(table)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim


class CNN:
    def __init__(self, input_shape, num_classes):
        self.model = self.create_model(input_shape, num_classes)
        self.model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
        
    def create_model(self, input_shape, num_classes):
        model = nn.Sequential(
            nn.Conv2d(1, 32, (3, 3), padding='same', activation='relu', input_shape=input_shape),
            nn.MaxPooling2D((2, 2)),
            nn.Conv2d(32, 64, (3, 3), padding='same', activation='relu'),
            nn.MaxPooling2D((2, 2)),
            nn.Flatten(),
            nn.Dense(128, activation='relu'),
            nn.Dense(num_classes, activation='softmax')
        )
        return model
    
    def fit(self, X_train, y_train, X_test, y_test, epochs=10, batch_size=32):
        self.model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test))
        
    def predict(self, X):
        return self.model.predict(X)
    