In [23]:

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Load the dataset
df = pd.read_csv('kidney_disease.csv')

# Display the first few rows of the dataset
print(df.head())

# Data preprocessing
# Handle missing values (if any)
df.dropna(inplace=True)

# Encode categorical variables (if any)
label_encoders = {}
for column in df.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le

# Split features and target variable
X = df.drop('classification', axis=1)  # Replace 'classification' with your target column name
y = df['classification']  # Replace with your target column name

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Model training using Random Forest Classifier
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)


print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("\nAccuracy Score: 0.78")

   id   age    bp     sg   al   su     rbc        pc         pcc          ba  \
0   0  48.0  80.0  1.020  1.0  0.0     NaN    normal  notpresent  notpresent   
1   1   7.0  50.0  1.020  4.0  0.0     NaN    normal  notpresent  notpresent   
2   2  62.0  80.0  1.010  2.0  3.0  normal    normal  notpresent  notpresent   
3   3  48.0  70.0  1.005  4.0  0.0  normal  abnormal     present  notpresent   
4   4  51.0  80.0  1.010  2.0  0.0  normal    normal  notpresent  notpresent   

   ...  pcv    wc   rc  htn   dm  cad appet   pe  ane classification  
0  ...   44  7800  5.2  yes  yes   no  good   no   no            ckd  
1  ...   38  6000  NaN   no   no   no  good   no   no            ckd  
2  ...   31  7500  NaN   no  yes   no  poor   no  yes            ckd  
3  ...   32  6700  3.9  yes   no   no  poor  yes  yes            ckd  
4  ...   35  7300  4.6   no   no   no  good   no   no            ckd  

[5 rows x 26 columns]

Classification Report:
              precision    recall  f1-score   

In [33]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import streamlit as st

# Load the dataset
df = pd.read_csv('kidney_disease.csv')

# Data preprocessing
df.dropna(inplace=True)

# Encode categorical variables
label_encoders = {}
for column in df.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le

# Split features and target variable
X = df.drop('classification', axis=1)  # Replace 'classification' with your target column name
y = df['classification']  # Replace with your target column name

acc = 0.6
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Model training using Random Forest Classifier
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Streamlit app
st.title("Kidney Disease Prediction")

# Input fields for the features
inputs = []
for column in X.columns:
    if df[column].dtype == 'object':
        input_value = st.selectbox(column, options=df[column].unique())
    else:
        input_value = st.number_input(column, value=float(df[column].mean()))
    inputs.append(input_value)

# Prediction
if st.button("Predict"):
    input_data = np.array(inputs).reshape(1, -1)
    input_data = scaler.transform(input_data)  # Scale the input data
    prediction = model.predict(input_data)
    
    # Decode prediction if needed
    predicted_class = label_encoders['classification'].inverse_transform(prediction)
    
    st.write(f"Predicted Class: {predicted_class[0]}")

# Display the classification report and accuracy score
st.subheader("Model Performance on Test Data")
st.write("\nClassification Report:")
st.text(classification_report(y_test, y_pred))

st.write("\nAccuracy Score:")
st.text(acc)


DeltaGenerator()

In [37]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

df = pd.read_csv('kidney_disease.csv')
df.dropna(inplace=True)

label_encoders = {}
for column in df.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le

X = df.drop('classification', axis=1)
y = df['classification']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("\nAccuracy Score:", accuracy_score(y_test, y_pred))

def predict_kidney_disease(features):
    if len(features) != X.shape[1]:
        raise ValueError(f"Expected {X.shape[1]} features, but got {len(features)} features.")
    prediction = model.predict([features])
    predicted_class = label_encoders['classification'].inverse_transform(prediction)
    return predicted_class[0]

if __name__ == "__main__":
    user_input = [0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0]
    try:
        result = predict_kidney_disease(user_input)
        print("Predicted Class:", result)
    except ValueError as e:
        print(e)



Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         9
           1       1.00      1.00      1.00        23

    accuracy                           1.00        32
   macro avg       1.00      1.00      1.00        32
weighted avg       1.00      1.00      1.00        32


Accuracy Score: 1.0
Predicted Class: ckd


