In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Inline plots for Jupyter
%matplotlib inline

# Load dataset
data = pd.read_csv("D:\\Final Year Project\\New folder\\Chronic-Kidney-Disease-Prediction-main\\Chronic-Kidney-Disease-Prediction-main\\Python Notebooks\\kidney_disease.csv")

# Cleaning up
data['classification'] = data['classification'].replace("ckd\t", "ckd")
data['classification'] = data['classification'].replace(['ckd', 'notckd'], [1, 0])
data.drop('id', axis=1, inplace=True)

# Fix weird wc values
data['wc'] = data['wc'].replace(["\t6200", "\t8400"], [6200, 8400])

# Drop rows with NaNs
df = data.dropna(axis=0).copy()
df.index = range(0, len(df), 1)

# Correct dtypes
df['pcv'] = df['pcv'].astype(int)
df['wc'] = df['wc'].astype(int)
df['rc'] = df['rc'].astype(float)

# Categorical to numeric conversion
dictionary = {
    "rbc": {"abnormal": 1, "normal": 0},
    "pc": {"abnormal": 1, "normal": 0},
    "pcc": {"present": 1, "notpresent": 0},
    "ba": {"present": 1, "notpresent": 0},
    "htn": {"yes": 1, "no": 0},
    "dm": {"yes": 1, "no": 0},
    "cad": {"yes": 1, "no": 0},
    "appet": {"good": 1, "poor": 0},
    "pe": {"yes": 1, "no": 0},
    "ane": {"yes": 1, "no": 0}
}
df = df.replace(dictionary)

# Pick 14 features (you can adjust this based on heatmap results)
selected_features = ['age', 'bp', 'al', 'su', 'bgr', 'bu', 'sc', 'pot', 
                     'wc', 'pcv', 'rc', 'htn', 'dm', 'hemo']

X = df[selected_features]
y = df['classification']

# Scaling
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Model training
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators=20, random_state=42)
model.fit(X_train, y_train)

# Evaluation
from sklearn.metrics import confusion_matrix, accuracy_score
predictions = model.predict(X_test)
conf_mat = confusion_matrix(y_test, predictions)
acc = accuracy_score(y_test, predictions)

print("Confusion Matrix:\n", conf_mat)
print(f"Accuracy: {round(acc * 100, 2)}%")

# Save model and scaler
import pickle
pickle.dump(model, open('kidney_14.pkl', 'wb'))
pickle.dump(scaler, open('kidney_scaler_14.pkl', 'wb'))


Confusion Matrix:
 [[23  0]
 [ 0  9]]
Accuracy: 100.0%
