# Kidney Disease

## Imports

In [None]:
import sys
import os

# Add the directory `src` containing python code
sys.path.append(os.path.abspath('../src'))

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.axes import Axes
import seaborn as sns
from dea_proccessing import get_df_info, label_encode_categorical_columns, one_shot_encode_categorical_columns, filtered_df
from graph_func import gender_boxplot_graph

In [None]:
import numpy as np
import pandas as pd

In [None]:
kidney_df = pd.read_csv("../data/Chronic_Kidney_Disease_data.csv")
kidney_df.drop(columns=["DoctorInCharge"], inplace=True)
kidney_df.head(5)

## information

In [None]:
get_df_info(kidney_df)

### Visualize data 

#### Calculate the correlation matrix

In [None]:
kidney_df_corr_matrix = kidney_df.corr()
high_corr_matrix = kidney_df_corr_matrix["Diagnosis"].sort_values(ascending=True).index.to_list()[30:]


##### Heatmap

In [None]:
columns_lst = ["Diagnosis","NauseaVomiting", "Smoking","DietQuality", "HbA1c", "ProteinInUrine",
               "HealthLiteracy", "MedicationAdherence", "AlcoholConsumption", "SleepQuality","SerumCreatinine", "ACR",
               "MedicalCheckupsFrequency", "QualityOfLifeScore","BMI", "SystolicBP", "BUNLevels",
               "PhysicalActivity", "DiastolicBP", "FastingBloodSugar", "GFR", "Age"
              ]

In [None]:
filtered_df(kidney_df ).T

In [None]:
kidney_df[columns_lst]

In [None]:
plt.figure(figsize=(14, 10))
# Create a mask for the upper triangle
matrix = np.triu(filtered_df(kidney_df,columns=kidney_df.columns.to_list()[6:20] ).corr())
sns.heatmap(filtered_df(kidney_df, columns=kidney_df.columns.to_list()[6:20] ).corr(),mask=matrix, annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5)
plt.title('Correlation Heatmap')
plt.show()

##### High corr matrix heatmap

In [None]:
filtered_df(kidney_df,columns=high_corr_matrix ).corr().max()

In [None]:
plt.figure(figsize=(20, 12))
# Create a mask for the upper triangle
matrix = np.triu(filtered_df(kidney_df,columns=high_corr_matrix ).corr())

# Generate a custom diverging colormap
cmap = sns.diverging_palette(230, 20, as_cmap=True)

sns.heatmap(filtered_df(kidney_df, columns=high_corr_matrix ).corr(),mask=matrix, annot=True, cmap=cmap ,fmt=".2f",
            square=True, vmax=0.2 ,linewidths=0.5, cbar_kws={"shrink": .5})
# sns.heatmap(filtered_df(kidney_df, columns=high_corr_matrix ).corr(),mask=matrix, annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5)
plt.title('Correlation Heatmap')
plt.show()

In [None]:
kidney_df.sample(10)

In [None]:
# for col in columns_lst:
#     sns.relplot(x="Age" , y=col, col="Gender",data=kidney_df, hue="Diagnosis" )

In [None]:
kidney_df[kidney_df["Diagnosis"] == 0]["SerumCreatinine"].describe()

In [None]:
kidney_df[kidney_df["Diagnosis"] == 1]["SerumCreatinine"].describe()

In [None]:
post_kidney_df = kidney_df[kidney_df["Diagnosis"] == 1]
post_age_kidney_df = post_kidney_df.groupby("Age").mean()

In [None]:
post_age_kidney_df["SerumCreatinine"].mean

In [None]:
# sns.set_theme(style="whitegrid")
ax: Axes
fig, ax = plt.subplots(layout='constrained')

fig.set_size_inches(16, 10)

ax.set(ylim=(0,4))

creatine_bar = ax.bar(x=post_age_kidney_df.index, height=post_age_kidney_df["SerumCreatinine"].round(2),width=.9, label="Serum Creatine")
# creatine_bar_2 = ax.bar(x=post_kidney_df.index, height=post_kidney_df["SerumCreatinine"].round(2).min(),width=.9, label="Serum Creatine2")

ax.bar_label(creatine_bar, padding=-30,rotation=90, label_type="edge", color="white")

ax.set_title("Creatine level by age")
ax.set_xlabel("Age")
ax.set_ylabel("Serum Creatine")
ax.set_xticks(np.arange(20, 91, 5 ) )
ax.set_xticks(post_age_kidney_df.index, minor=True)

ax.legend(loc="upper right")

plt.show()

In [None]:

ax: Axes
fig, ax = plt.subplots()
fig.set_size_inches(12, 8)

# ax.bar("Age",kidney_df[kidney_df["Diagnosis"] == 1]["Age"].mean())

ax.plot(post_age_kidney_df.index, "SerumCreatinine", data=post_age_kidney_df)
plt.show



In [None]:
kidney_df["Gender"]

In [None]:

kidney_df["Gender_label"] =  kidney_df["Gender"].apply(lambda x: "Male" if x == 1 else "Female")


In [None]:
kidney_df["Gender_label"]

In [None]:
male_kidney_df = kidney_df[kidney_df["Gender"] == 1 ]
female_kidney_df = kidney_df[kidney_df["Gender"] == 0 ]

male_kidney_df

In [None]:

more_than_1_columns = ["Gender"] + [str(col_name) for col_name, col_value in kidney_df.min(numeric_only=True).items() if col_value > 1]
print( more_than_1_columns)

In [None]:
filtered_df(post_kidney_df,columns=more_than_1_columns).T

In [None]:
gender_boxplot_graph(filtered_df(post_kidney_df,columns=more_than_1_columns , rm_columns=["SleepQuality"]),
                    "male",
                    legend=True,
                    showmeans=True
                    )

In [None]:
from typing import Any
def gender_boxplot_graph_2(df: pd.DataFrame, gender: str = "male", **kwargs: Any):
    """This function generates a boxplot based on the gender passed, it defaults to male.

    Args:
        df (pd.DataFrame): The dataframe with the data.
        gender (str, optional): The gender to create the plot for. Defaults to "male".
        kwargs (Any, optional): Any argument to be passed to the boxplot.
    """
    ax: Axes

    fig, ax = plt.subplots(figsize=(12, 20))
    data = df[df["Gender"] == 1].drop("Gender", axis=1) if gender == "male" else df[df["Gender"] == 0].drop("Gender", axis=1)
    
    sns.boxplot(data=data, ax=ax, **kwargs)
    ax.set_title(f"{gender.capitalize()} Graph")

    # Annotate the plot with median, Q1, Q3 labels
    for i, column in enumerate(data.columns):
        column_data = data[column].dropna()
        quartiles = column_data.quantile([0.25, 0.5, 0.75])
        q1 = quartiles[0.25]
        median = quartiles[0.5]
        q3 = quartiles[0.75]

        ax.text(i, median, f'Median: {median:.2f}', horizontalalignment='center', size='small', color='black', weight='semibold')
        ax.text(i, q1, f'Q1: {q1:.2f}', horizontalalignment='center', size='small', color='blue', weight='semibold')
        ax.text(i, q3, f'Q3: {q3:.2f}', horizontalalignment='center', size='small', color='blue', weight='semibold')

    plt.show()

gender_boxplot_graph_2(filtered_df(kidney_df,columns=more_than_1_columns ),
                    "male",
                    legend=True,
                    showmeans=True
                    )