In [7]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import AdaBoostClassifier, ExtraTreesClassifier
from sklearn.tree import DecisionTreeClassifier

data = {
    "age": [34],
    "blood_pressure": [70],
    "specific_gravity":[1.020],
    "albumin":[3.0],
    "sugar":[0.0],
    "red_blood_cells":[0],
    "pus_cell":[0],
    "pus_cell_clumps":[1],
    "bacteria":[1],
    "blood_glucose_random":[117.0],
    "blood_urea":[20.0],
    "serum_creatinine":[100],
    "sodium":[100],
    "potassium":[1],
    "haemoglobin":[11.6],
    "packed_cell_volume":[30],
    "white_blood_cell_count":[7500],
    "red_blood_cell_count":[4.6],
    "hypertension": [0],
    "diabetes_mellitus": [0],
    "coronary_artery_disease": [0],
    "appetite": [0],
    "pedal_edema": [0],
    "anaemia": [0]
}

input_data = pd.DataFrame(data)

df= pd.read_csv('kidney_disease.csv')
df.drop('id', axis = 1, inplace = True)
df.columns = ['age', 'blood_pressure', 'specific_gravity', 'albumin', 'sugar', 'red_blood_cells', 'pus_cell',
              'pus_cell_clumps', 'bacteria', 'blood_glucose_random', 'blood_urea', 'serum_creatinine', 'sodium',
              'potassium', 'haemoglobin', 'packed_cell_volume', 'white_blood_cell_count', 'red_blood_cell_count',
              'hypertension', 'diabetes_mellitus', 'coronary_artery_disease', 'appetite', 'pedal_edema',
              'anaemia', 'class']

df['packed_cell_volume'] = pd.to_numeric(df['packed_cell_volume'], errors='coerce')
df['white_blood_cell_count'] = pd.to_numeric(df['white_blood_cell_count'], errors='coerce')
df['red_blood_cell_count'] = pd.to_numeric(df['red_blood_cell_count'], errors='coerce')
cat_cols = [col for col in df.columns if df[col].dtype == 'object']
num_cols = [col for col in df.columns if df[col].dtype != 'object']

df['diabetes_mellitus'].replace(to_replace = {'\tno':'no','\tyes':'yes',' yes':'yes'},inplace=True)
df['coronary_artery_disease'] = df['coronary_artery_disease'].replace(to_replace = '\tno', value='no')
df['class'] = df['class'].map({'ckd': "yes", 'notckd': "no", 'ckd\t':"yes"})
cols = ['diabetes_mellitus', 'coronary_artery_disease', 'class']
df.isna().sum().sort_values(ascending = False)

def random_value_imputation(feature):
  random_sample = df[feature].dropna().sample(df[feature].isna().sum())
  random_sample.index = df[df[feature].isnull()].index
  df.loc[df[feature].isnull(), feature] = random_sample
    
def impute_mode(feature):
  mode = df[feature].mode()[0]
  df[feature] = df[feature].fillna(mode)

for col in num_cols:
  random_value_imputation(col)

random_value_imputation('red_blood_cells')
random_value_imputation('pus_cell')

for col in cat_cols:
  impute_mode(col)


le = LabelEncoder()

for col in cat_cols:
  df[col] = le.fit_transform(df[col])

#scaling
ind_col = [col for col in df.columns if col != 'class']
dep_col = 'class'
X = df[ind_col]
y = df[dep_col]

scaler=StandardScaler()
X['age']=scaler.fit_transform(X[['age']])
input_data['age']=scaler.transform(input_data[['age']])

#Ada Boost Model
ada = AdaBoostClassifier(n_estimators=100, random_state = 42)
ada.fit(X, y)

result = [ada.predict(input_data),ada.predict_proba(input_data)]
riskindex = result[1].reshape(-1)[1]
print(result[0],riskindex)


  #Extra trees model
extc = ExtraTreesClassifier()
extc.fit(X, y)

result = [extc.predict(input_data),extc.predict_proba(input_data)]
riskindex = result[1].reshape(-1)[1]
print(result[0],riskindex)


  #Decision Tree Model
dtc = DecisionTreeClassifier(random_state=42)
dtc.fit(X, y)

result = [dtc.predict(input_data),dtc.predict_proba(input_data)]
riskindex = result[1].reshape(-1)[1]
print(result[0],riskindex)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['diabetes_mellitus'].replace(to_replace = {'\tno':'no','\tyes':'yes',' yes':'yes'},inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['age']=scaler.fit_transform(X[['age']])


[1] 0.6566671935950376
[1] 0.99
[1] 1.0
