# Plot functions for Exploratory Data Analysis (EDA)

In [1]:
#plot function for data distribution of each attribute and the target
def histplot(rows, columns, data, grid = "darkgrid", hspace = 0.5):
    """
    Creates data distribution subplot of each given variable.
    Input: rows (number of rows), columns (number of columns in the plot), data, grid and hspace.
    Output: subplots of data distribution or histplot of the given variables.
    """
    sns.set(rc = {"figure.figsize": (15, 12)})
    sns.set_theme(style = grid)
    fig, axes = plt.subplots(rows, columns)
    plt.subplots_adjust(hspace = hspace)
    count = 0
    if len(data.keys()) % 2 != 0:
        for row in range(rows):
            for column in range(columns):
                if (row == rows - 1 and column == columns - 1):
                    fig.delaxes(axes[row, column])
                else:
                    sns.histplot(data[data.keys()[count]], ax = axes[row, column], kde = True)
                    #kde is kernel density estimation that estimates the pdf of a continuous random variable
                count += 1
    else:
        for row in range(rows):
            for column in range(columns):
                sns.histplot(data[data.keys()[count]], ax = axes[row, column], kde = True)
                count += 1
    plt.suptitle("Data distributions of the variables and the target")
    plt.show()

In [2]:
#plot function for boxplot of each attribute and the target
def boxplot(rows, columns, data, grid = "darkgrid", hspace = 0.5):
    """
    Creates boxplot subplot of each given variable.
    Input: rows (number of rows), columns (number of columns in the plot), data, grid and hspace.
    Output: boxplots of the given variables.
    """
    sns.set(rc = {"figure.figsize": (15, 12)})
    sns.set_theme(style = grid)
    fig, axes = plt.subplots(rows, columns)
    plt.subplots_adjust(hspace = hspace)
    count = 0
    if len(data.keys()) % 2 != 0:
        for row in range(rows):
            for column in range(columns):
                if (row == rows - 1 and column == columns - 1):
                    fig.delaxes(axes[row, column])
                else:
                    sns.boxplot(x = data[data.keys()[count]], ax = axes[row, column])
                count += 1
    else:
        for row in range(rows):
            for column in range(columns):
                sns.boxplot(x = data[data.keys()[count]], ax = axes[row, column])
                count += 1
    plt.suptitle("Boxplots of the variables and the target")
    plt.show()

In [3]:
#plot function for violinplot of each attribute and the target
def violinplot(rows, columns, data, grid = "darkgrid", hspace = 0.5):
    """
    Creates subplots of violinplots that is a combination of boxplot and kde.
    Input: rows (number of rows), columns (number of columns in the plot), data, grid and hspace.
    Output: violinplots of the given variables.
    """
    sns.set(rc = {"figure.figsize": (15, 12)})
    sns.set_theme(style = grid)
    fig, axes = plt.subplots(rows, columns)
    plt.subplots_adjust(hspace = hspace)
    count = 0
    if len(data.keys()) % 2 != 0:
        for row in range(rows):
            for column in range(columns):
                if (row == rows - 1 and column == columns - 1):
                    fig.delaxes(axes[row, column])
                else:
                    sns.violinplot(x = data[data.keys()[count]], ax = axes[row, column])
                count += 1
    else:
        for row in range(rows):
            for column in range(columns):
                sns.violinplot(x = data[data.keys()[count]], ax = axes[row, column])
                count += 1
    plt.suptitle("Violinplots of the variables and the target")
    plt.show()