# Kidney Disease

## Imports

In [None]:
import sys
import os

# Add the directory `src` containing python code
sys.path.append(os.path.abspath('../src'))

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.axes import Axes
import seaborn as sns
from dea_proccessing import get_df_info, label_encode_categorical_columns, one_shot_encode_categorical_columns, filtered_df
from graph_func import gender_boxplot_graph

In [None]:
import numpy as np
import pandas as pd

In [None]:
kidney_df = pd.read_csv("../data/Chronic_Kidney_Disease_data.csv")
kidney_df.drop(columns=["DoctorInCharge"], inplace=True)
kidney_df.head(5)

## information

In [None]:
get_df_info(kidney_df)

### Visualize data 

#### Calculate the correlation matrix

In [None]:
kidney_df_corr_matrix = kidney_df.corr()
# high_corr_matrix = kidney_df_corr_matrix["Diagnosis"].sort_values(ascending=True).index.to_list()[30:]

high_corr_matrix = kidney_df_corr_matrix["Diagnosis"].round(2).sort_values()[kidney_df_corr_matrix["Diagnosis"].round(2).sort_values() >= 0].index.to_list()

low_corr_matrix = kidney_df_corr_matrix["Diagnosis"].sort_values(ascending=True).index.to_list()[:30]


In [None]:
kidney_df_corr_matrix

##### Heatmap

In [None]:
columns_lst = ["Diagnosis","NauseaVomiting", "Smoking","DietQuality", "HbA1c", "ProteinInUrine",
               "HealthLiteracy", "MedicationAdherence", "AlcoholConsumption", "SleepQuality","SerumCreatinine", "ACR",
               "MedicalCheckupsFrequency", "QualityOfLifeScore","BMI", "SystolicBP", "BUNLevels",
               "PhysicalActivity", "DiastolicBP", "FastingBloodSugar", "GFR", "Age"
              ]

In [None]:
filtered_df(kidney_df ).T

In [None]:
kidney_df[columns_lst]

In [None]:
plt.figure(figsize=(14, 10))
# Create a mask for the upper triangle
matrix = np.triu(filtered_df(kidney_df,columns=kidney_df.columns.to_list()[6:20] ).corr())
sns.heatmap(filtered_df(kidney_df, columns=kidney_df.columns.to_list()[6:20] ).corr(),mask=matrix, annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5)
plt.title('Correlation Heatmap')
plt.show()

##### Corr matrix heatmap

#### High Correlation Heatmap

In [None]:
# plt.figure()

ax: Axes
fig, ax = plt.subplots(figsize=(20,16))


# Create a mask for the upper triangle
matrix = np.triu(filtered_df(kidney_df,columns=high_corr_matrix ).corr())

# Generate a custom diverging colormap
cmap = sns.diverging_palette(230, 20, as_cmap=True)

sns.heatmap(filtered_df(kidney_df, columns=high_corr_matrix ).corr()[high_corr_matrix],mask=matrix, annot=True, cmap=cmap ,fmt=".2f",
            square=True, vmax=0.2 ,linewidths=0.5, cbar_kws={"shrink": .5})
# sns.heatmap(filtered_df(kidney_df, columns=high_corr_matrix ).corr(),mask=matrix, annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5)
plt.title('Correlation Heatmap')
plt.show()

In [None]:
kidney_df.sample(10)

In [None]:
# for col in columns_lst:
#     sns.relplot(x="Age" , y=col, col="Gender",data=kidney_df, hue="Diagnosis" )

In [None]:
kidney_df[kidney_df["Diagnosis"] == 0]["SerumCreatinine"].describe()

In [None]:
kidney_df[kidney_df["Diagnosis"] == 1]["SerumCreatinine"].describe()

In [None]:
kidney_df["SerumCreatinine"].unique().size/100

In [None]:
post_kidney_df = kidney_df[kidney_df["Diagnosis"] == 1]
post_age_kidney_df = post_kidney_df.groupby("Age").mean()
post_creatine_kidney_df = post_kidney_df.groupby("SerumCreatinine").mean()

In [None]:
post_creatine_kidney_df.T

### Creatine levels by Age

In [None]:
post_age_kidney_df.index

#### Male Serum Creatine level by age

In [None]:
# sns.set_theme(style="whitegrid")
ax: Axes
fig, ax = plt.subplots(layout='constrained')

fig.set_size_inches(16, 10)

ax.set(ylim=(0,4))

creatine_bar = ax.bar(x=post_age_kidney_df.index, height=post_age_kidney_df["SerumCreatinine"].round(2),width=.9, label="Serum Creatine")
# creatine_bar_2 = ax.bar(x=post_kidney_df.index, height=post_kidney_df["SerumCreatinine"].round(2).min(),width=.9, label="Serum Creatine2")

ax.bar_label(creatine_bar, padding=-30,rotation=90, label_type="edge", color="white")

ax.set_title("Creatine level by age")
ax.set_xlabel("Age")
ax.set_ylabel("Serum Creatine")
ax.set_xticks(np.arange(20, 91, 5 ) )
ax.set_xticks(post_age_kidney_df.index, minor=True)

ax.legend(loc="upper right")

plt.show()

In [None]:
post_creatine_kidney_df.index

### Creatine levels

In [None]:

ax: Axes
fig, ax = plt.subplots()
fig.set_size_inches(12, 8)


ax.plot(post_age_kidney_df.index, "SerumCreatinine", data=post_age_kidney_df)
plt.show



In [None]:

kidney_df["Gender_label"] =  kidney_df["Gender"].apply(lambda x: "Male" if x == 1 else "Female")


In [None]:
kidney_df["Gender_label"]

In [None]:
male_kidney_df = kidney_df[kidney_df["Gender"] == 0 ]
female_kidney_df = kidney_df[kidney_df["Gender"] == 1 ]

male_kidney_df

In [None]:
post_kidney_df.quantile(0.25)

In [None]:

more_than_1_columns = ["Gender"] + [str(col_name) for col_name, col_value in post_kidney_df.quantile(0.25,numeric_only=True).items() if col_value > 1]
print( more_than_1_columns)
filtered_df(post_kidney_df,columns=more_than_1_columns)

In [None]:
gender_boxplot_graph(filtered_df(post_kidney_df,columns=more_than_1_columns , rm_columns=["SleepQuality", 
                                                                                            "HbA1c",
                                                                                            "SerumElectrolytesPotassium",
                                                                                            "SerumElectrolytesPhosphorus",
                                                                                            "SerumElectrolytesCalcium"
                                                                                            ]),
                    "male",
                    title="Male Positive Diagnosises",
                    orient= "h",
                    legend=True,
                    )