In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load data
train_data = pd.read_csv("Train_data.csv")
test_data = pd.read_csv("test_data.csv")

# Data Preprocessing

# Drop rows with missing values
train_data.dropna(inplace=True)
test_data.dropna(inplace=True)

# Separate features (X) and target variable (y)
X_train = train_data.drop('Disease', axis=1)
y_train = train_data['Disease']
X_test = test_data.drop('Disease', axis=1)
y_test = test_data['Disease']

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Convert multiclass problem into binary (diseased vs. non-diseased)
y_train_binary = y_train.apply(lambda x: 0 if x == 'Healthy' else 1)
y_test_binary = y_test.apply(lambda x: 0 if x == 'Healthy' else 1)

# Train the model
model = GaussianNB()
model.fit(X_train_scaled, y_train_binary)

# Predict on test data
y_pred_binary = model.predict(X_test_scaled)

# Print the evaluation metrics for binary classification
print("Accuracy:", accuracy_score(y_test_binary, y_pred_binary))
print("Precision:", precision_score(y_test_binary, y_pred_binary))
print("Recall:", recall_score(y_test_binary, y_pred_binary))
print("F1 Score:", f1_score(y_test_binary, y_pred_binary))


Accuracy: 0.9465020576131687
Precision: 0.9934924078091106
Recall: 0.9521829521829522
F1 Score: 0.9723991507430998
