# Plot functions for Exploratory Data Analysis (EDA)

In [1]:
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
#plot function for data distributions
def histplot(rows, columns, data, grid = "darkgrid", hspace = 0.5, figsize = (15, 12), suptitle = "data distributions"):
    """
    Creates data distribution subplot of each given variable.
    Inputs: rows (number of rows), columns (number of columns), data, grid (default is darkgrid), hspace (default is 0.5), figsize (default is (15, 12)), suptitle (default is "data distributions").
    Output: subplots of data distribution or histplot of the given variables.
    """
    sns.set(rc = {"figure.figsize": figsize})
    sns.set_theme(style = grid)
    fig, axes = plt.subplots(rows, columns)
    plt.subplots_adjust(hspace = hspace)
    count = 0
    if rows*columns > len(data.keys()):
        for row in range(rows):
            for column in range(columns):
                if row == rows - 1 and column == columns - 1:
                    fig.delaxes(axes[row, column])
                else:
                    sns.histplot(data[data.keys()[count]], ax = axes[row, column], kde = True)
                    #kde is kernel density estimation that estimates the pdf of a continous random variable
                count += 1
    else:
        for row in range(rows):
            for column in range(columns):
                sns.histplot(data[data.keys()[count]], ax = axes[row, column], kde = True)
                count += 1
    plt.suptitle(suptitle)
    plt.show()

In [3]:
#plot function for boxplots
def boxplot(rows, columns, data, grid = "darkgrid", hspace = 0.5, figsize = (15, 12), suptitle = "boxplots"):
    """
    Creates boxplot subplot of each given variable.
    Inputs: rows (number of rows), columns (number of columns), data, grid (default is darkgrid), hspace (default is 0.5), figsize (default is (15, 12)), suptitle (default is "boxplots").
    Output: subplots of boxplot of the given variables.
    """
    sns.set(rc = {"figure.figsize": figsize})
    sns.set_theme(style = grid)
    fig, axes = plt.subplots(rows, columns)
    plt.subplots_adjust(hspace = hspace)
    count = 0
    if rows*columns > len(data.keys()):
        for row in range(rows):
            for column in range(columns):
                if row == rows - 1 and column == columns - 1:
                    fig.delaxes(axes[row, column])
                else:
                    sns.boxplot(x = data[data.keys()[count]], ax = axes[row, column])
                count += 1
    else:
        for row in range(rows):
            for column in range(columns):
                sns.boxplot(x = data[data.keys()[count]], ax = axes[row, column])
                count += 1
    plt.suptitle(suptitle)
    plt.show()

In [4]:
#plot function for violinplot
def violinplot(rows, columns, data, grid = "darkgrid", hspace = 0.5, figsize = (15, 12), suptitle = "violinplots"):
    """
    Creates violinplot subplot of each given variable.
    Inputs: rows (number of rows), columns (number of columns), data, grid (default is darkgrid), hspace (default is 0.5), figsize (default is (15, 12)), suptitle (default is "violinplots").
    Output: subplots of violinplots of the given variables.
    """
    sns.set(rc = {"figure.figsize": figsize})
    sns.set_theme(style = grid)
    fig, axes = plt.subplots(rows, columns)
    plt.subplots_adjust(hspace = hspace)
    count = 0
    if rows*columns > len(data.keys()):
        for row in range(rows):
            for column in range(columns):
                if row == rows - 1 and column == columns - 1:
                    fig.delaxes(axes[row, column])
                else:
                    sns.violinplot(x = data[data.keys()[count]], ax = axes[row, column])
                count += 1
    else:
        for row in range(rows):
            for column in range(columns):
                sns.violinplot(x = data[data.keys()[count]], ax = axes[row, column])
                count += 1
    plt.suptitle(suptitle)
    plt.show()

In [5]:
#plot function for pairgrid
def pairgrid(data, diag = sns.kdeplot, upper = sns.scatterplot, lower = sns.kdeplot, grid = "darkgrid"):
    """
    Plots pairwise relationship between each variable.
    Inputs: data, diag (default is sns.kdeplot), upper (default is sns.scatterplot), lower (default is sns.kdeplot), grid (default is darkgrid).
    Output: subplots of relationship between each variable.
    """
    sns.set_theme(style = grid)
    g = sns.PairGrid(data)
    g.map_diag(diag)
    g.map_upper(upper)
    g.map_lower(lower)
    plt.show()