In [1]:
import numpy as np
import pandas as pd
import plotly.express as px

In [2]:
df = pd.read_csv('dataset/Heart_clean.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 293 entries, 0 to 292
Data columns (total 14 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   age                      293 non-null    int64  
 1   sex                      293 non-null    object 
 2   chest_pain_type          293 non-null    object 
 3   resting_blood_pressure   293 non-null    int64  
 4   cholestrol               293 non-null    int64  
 5   fasting_blood_sugar      293 non-null    object 
 6   resting_ecg              293 non-null    object 
 7   max_heart_rate           293 non-null    int64  
 8   exercise_induced_angina  293 non-null    object 
 9   depression_level         293 non-null    float64
 10  peak_exercise_st_slope   293 non-null    object 
 11  number_of_major_vessels  293 non-null    int64  
 12  thalassemia_disorder     293 non-null    object 
 13  target                   293 non-null    object 
dtypes: float64(1), int64(5), o

In [3]:
def get_bar_chart(df, x, y, barmode=None, color=None, category_orders=None, labels=None, title=None):
    """This method visualizes the data into a bar chart.

    Args:
        df (Pandas DataFrame): Preprocessed data from raw sources
        x (str): Usually a column name in DataFrame, to set x-axis
        y (str): Usually a column name in DataFrame, to set y-axis
        barmode (str): Choose barmode from 'group', 'stack' or 'relative'. Defaults to 'relative'
        color (str): Usually a column name in the DataFrame, to assign colors to categories
        category_orders (dict): To set order of categorical values in axes, legends or facets
        labels (dict): To override/rename axis titles, legend entries or hovers
        title (str): The title of figure

    Returns:
        Plotly Bar Chart: A bar chart.
    """
    df_bar = df.copy()
    df_bar[x] = df_bar[x].astype(str)

    df_bar = df_bar.groupby([x, color], as_index=False).agg({y:'count'})

    fig_bar = px.bar(df_bar, x=x, y=y,
                     barmode=barmode, color=color, 
                     category_orders=category_orders, 
                     labels=labels,
                     title=title)
    fig_bar.update_layout(showlegend=True, width=600, height=400)
    fig_bar.update_traces(marker_line_width=0, opacity=0.95)

    return fig_bar.show()


In [4]:
def get_pie_chart(df, values, names, category_orders=None, labels=None, title=None):
    """This method visualizes the data into a pie chart.

    Args:
        df (Pandas DataFrame): Preprocessed data from raw sources
        values (str): Usually a column name in DataFrame, to get values
        names (str): Usually a column name in DataFrame, to get names
        category_orders (dict): To set order of categorical values in axes, legends or facets
        labels (dict): To override/rename axis titles, legend entries or hovers
        title (str): The title of figure

    Returns:
        Plotly Pie Chart: A pie chart.
    """
    df_pie = df.copy()
    df_pie[values] = df_pie[values].astype(str)

    df_pie_count = df_pie[names].value_counts().reset_index()
    df_pie_count.columns = [names, values]

    fig_pie = px.pie(df_pie_count, values=values, names=names,
                     category_orders=category_orders, 
                     labels=labels,
                     title=title)
    fig_pie.update_layout(showlegend=True, width=600, height=400)
    fig_pie.update_traces(textposition='inside', textinfo='percent+label')

    return fig_pie.show()

In [5]:
def get_histogram(df, column, bins_range, bin_width, x_label, y_label, title=None):
    """This method visualizes the data into a histogram.

    Args:
        df (Pandas DataFrame): Preprocessed data from raw sources
        column (str): Usually a column name in DataFrame, to set column to be used for the histogram
        bins_range (tuple): A tuple containing the range of the bins (start, end)
        bin_width (int): The width of the bins
        x_label (str): The label for the x-axis. Defaults to 'x'
        y_label (str): The label for the y-axis. Defaults to 'y'
        title (str): The title of the figure. Defaults to None

    Returns:
        Plotly Histogram: A histogram.
    """
    # create the bins
    counts, bins = np.histogram(df[column], bins=range(*bins_range, bin_width))
    bins = 0.5 * (bins[:-1] + bins[1:])

    fig_hist = px.bar(x=bins, y=counts, labels={x_label:x_label, y_label:y_label}, title=title)

    # Update the axis labels
    fig_hist.update_xaxes(title_text=x_label)
    fig_hist.update_yaxes(title_text=y_label)
    fig_hist.update_layout(showlegend=True, width=600, height=400)

    return fig_hist.show()

In [8]:
df_gen_tar = df.copy()
# Example usage:
fig = get_bar_chart(df_gen_tar, 'sex', 'age', color='target', barmode='group',
                    category_orders={'sex': ['Female', 'Male']}, 
                    labels={'sex': 'Gender', 'age': 'Count', 'target': 'Target'}, 
                    title='Gender by Heart Disease')

In [9]:
df_cpt_tar = df.copy()

fig = get_bar_chart(df_cpt_tar, 'chest_pain_type', 'age', color='target', barmode='group',
                    labels={'chest_pain_type': 'Chest Pain Type', 'age': 'Count', 'target': 'Target'}, 
                    title='Chest Pain Type by Heart Disease')

In [10]:
df_ca_tar = df.copy()

fig = get_bar_chart(df_ca_tar, 'number_of_major_vessels', 'age', color='target', barmode='group',
                    category_orders={'number_of_major_vessels': [0, 1, 2, 3]}, 
                    labels={'number_of_major_vessels': 'Number of Major Vessels', 'age': 'Count', 'target': 'Target'}, 
                    title='Chest Pain Type by Heart Disease')

In [11]:
df_thal = df.copy()
# Example usage:
fig = get_bar_chart(df_thal, 'thalassemia_disorder', 'age', color='target',
                    category_orders={'thalassemia_disorder': ['Normal Blood Flow', 'Fixed Defect', 'Reversible Defect']}, 
                    labels={'thalassemia_disorder': 'Thalassemia Disorder', 'age': 'Count', 'target': 'Target'}, 
                    title='Thalassemia Disorder Distribution')

In [12]:
# gender distribution (pie chart)
df_gen = df.copy()
fig = get_pie_chart(df_gen, values='age', names='sex', 
                        labels={'age': 'Count'},
                        title='Gender Distribution')

In [13]:
df_age = df.copy()

fig_hist = get_histogram(df=df_age, column='age', bins_range=(20, 80), bin_width=10, x_label='age', y_label='count', title='Age Distribution')