# Introduction to data visualization with matplotlib

In [2]:
import matplotlib.pyplot as plt # importing matplotlib package

In [None]:
fig, ax = plt.subplots() # creating a figure and axes for plotting data
plt.show() # show the plot

In [None]:
 # plotting a line graph
ax.plot(x = "column_name", # x_value
        y = "column_name", # y_value
        marker = "0", # markter type
        linestyle = "--", # line style
        color = "r") # color

In [None]:
ax.set_xlabel("label") # xlabel
ax.set_ylabel("label") # ylabel
ax.set_title("title") # title of the graph

In [None]:
# small multiples
fig, ax = plt.subplots(3,2, sharey = True) # 3 row # 2 column, sharing the y-axis range
ax[0,0].plot(x = "column_name", y = "column_name") # plotting specific axis

In [None]:
# plotting time-series data
dataframe = pd.read_csv("file_name", parse_dates = ["date"], index_col = "date") # parsing time series dataframe set date column as a index
dataframe.set_index("date_time_column") # setting the x value for time series data
fig, ax = subplots()
ax.plot(x = dataframe.index, y = "column_name")
plt.show()

In [None]:
# creating plot with twin y axis with annotation  
fig, ax = plt.subplot()
ax.plot(x = "column_name", y = "column_name", color = "blue")
ax.set_xlabel("label")
ax.set_ylabel("label", color = "blue")
ax.tick_params("y", colors = "blue") # setting axis tick color
ax2 = ax.twinx() # setting 2 y_axis on same graph
ax2.plot(x = "column_name", y = "column_name", color = "red") 
ax2.set_ylabel("label", color = "red") 
ax2.set_params("y", colors = "red") # setting axis tick color
ax2.annotate("annotating_text", xy = ["x_location", "y_location"], xytext = ["x_location","y_location"], arrowprops = {"arrowstyle":"->","color":"grey"})
plt.show() # 

In [None]:
ax.bar(x = "column_name", y = "column_name") # creatting bar chart
ax.set_xticklabels(x = "column_name", rotation = 90) # rotatting labels 90 degree

In [None]:
ax.bar(x = "column_name_1", y = "column_name_2", label = "label")
ax.bar(x = "column_name_3", y = "column_name_4", bottom = "column_name_1", label = "label")
ax.bar(x = "column_name_5", y = "column_name_6", bottom = "column_name_1" + "column_name_4", label = "label")
ax.legend()

In [None]:
ax.bar(x = "x_label", y = "column_name".mean())
ax.bar(x = "x_label", y = "column_name".mean())

In [None]:
ax.hist(dataframe["column_name"], bins = 5, label = "label", histtype = "step") # histtype step create full transparent histgramme
ax.hist(dataframe["column_name"], bins = [5,10,15,20,25,30], label = "label") 

In [None]:
ax.bar(x = "x_label", y = "column_name".mean(), yerr = "column_name".std()) # yerr = plotting the error bar 

In [None]:
ax.errorbar(x = "column_name", y = "column_name", yerr = "column_name") # yerr = plotting the error bar 

In [None]:
ax.boxplot(x = "column_name", y = "column_name") # creating a boxplot
ax.set_xticklabels(["xlabel, ylabel"]) # x_axis label

In [None]:
ax.scatter(x = "column_name", y = "column_name", c = "column_name") # c = color 

In [None]:
plt.style.use("style") # changing plotting style 

In [None]:
fig.savefig("name.png", quality = 50, dpi = 300) dpi = dot per inch # saving figure on the local drive

In [None]:
fig.set_size_inches([5,3]) # setting figure size 

# Introduction to Seaborn 

In [None]:
# importing library
import matplotlib.pyplot as plt 
import seaborn as sns 

In [None]:
dataframe = sns.load_dataset("name_of_the_dataset") # loading datasets from seaborn packages 

In [None]:
sns.scatterplot(x = "column_name", # x_axis value
                y = "column_name", # y_axis value
                data = dataframe, # dataframe for plotting
                hue = "column_name", # break down futher detail by third variable
                hue_order = ["yes","no"], # order of the used third variable
                palette = {"yes" : "black", "no" : "red"}) # color for used third variable

In [None]:
sns.countplot(x = "column_name", data = dataframe) # creating bar plot

In [None]:
# creating subplots in a single figure 
sns.relplot(x = "column_name", # x_axis value 
            y = "column_name", # y_axi value
            data = dataframe, # data frame
            kind = "scatter", # kind of  plot
            col  = "column_name", # break down plot column wise by third variable
            row = "column_name", # break down plot row wise by third variable
            col_warp = number to organzie plot, # organize plot by given number
            col_order = ["category","category","category"], # order of the third variable catergory 
            size = "column_name", # use size as a theird variable to look for pattern 
            style = "smoker" # use style as a theird variable to look for pattern 
            alpha = 0.4 # transperancy 
           )

In [None]:
sns.relplot(x = "column_name", # x_values
            y = "column_name", # y_values
            kind = "line", # line_plot
            markers = True, # enable or disable markers 
            dashes = False, # enable or disable dashes
            ci = "sd" # confidence interval 
           ) 

In [None]:
sns.catplot(x = "column_name", # creating bar plot 
            data = dataframe,
            kind = "count", # kind of plot
            order = ["category_1", "category_2"],
            ci = None, ) 

In [None]:
sns.catplot(x = "column_name",# creating box plot 
            y = "column_name"
            data = dataframe,
            kind = "box",
            order = ["category_1", "category_2"],
            ci = None, # confidence interval
            sym = "", # do not how outliners
            whis = [0,100]) # include min and max

In [None]:
sns.catplot(x = "column_name", # x_value
            y = "column_name" # y_value
            data = dataframe, # dataframe
            kind = "point", # kind of plot
            hue = "column_name", # drill down by third column
            join = False, # disjoin line between data point
            estimator = median, # statistical estimater 
            capsize = 0.2) # size of the error bar

In [None]:
sns.set_style("style") # setting the plotting style 
sns.set_palette("palette") # setting color palette 
sns.set_context("context") # setting the context 

In [None]:
# FacetGrid 
plot = sns.catplot(x = "column_name",
                   y = "column_name",
                   data = dataframe,
                   kind = "box")
plot.fig.suptitle("title", y = 1.03) # setting a title for plot, y = take up the title of the plot
plot.set_titles("This is {col_name}") # setting the title for each subplot in the facegrid
plot.set(x_label = "x_label", y_label = "y_label") # setting axis label
plt.xticks(rotation = 90) # rotating x_label

In [None]:
# AxesSubplot
plot = sns.boxplot(x = "column_name",
                   y = "column_name",
                   data = dataframe)
plot.set_title("title", y = 1.03) # setting a title for plot, y = take up the title of the plot

# Intermediate data visualization with seaborn

In [None]:
sns.histplot(dataframe["column_name"]) # creating histograme

In [None]:
sns.displot(dataframe["column_name"], # column to plot
            kind = "kde", # kernal density estimation plot
            rug = True, # rug in x axis
            fill = true) # fill the gap under the curve
sns.displot(dataframe["column_name"], # histograme and kernal density plot in one graph
            kde = True,  
            bins = 10)  

In [None]:
sns.displot(dataframe["column_name"], kind = "ecdf") # creating a cumulative distribution 

In [None]:
sns.regplot(x = "column_name", y = "column_name", data = dataframe) # low level regression plot 

In [None]:
sns.lmplot(x = "column_name", y = "column_name", data = dataframe, hue = "column_name", col = "column_name") # high level regression plot

In [None]:
sns.set() # setting seaborn default configurations 

In [None]:
sns.despine(left = True) # remove the plot spine 

In [None]:
sns.palplot() # display the palette
sns.color_palette("name_of_the_palette", number of colors) # current palette

In [None]:
# we can customization available through matplotlib axes objects
fig, ax = plt.subplots()
sns.histplot(dataframe["column_name"], ax = ax)
ax.set(xlabel = "label" xlim = (0, 50000), title = "title") # xlim is the range of the x_axis

In [None]:
fig, (ax0, ax1) = plt.subplot(nrows = 1, ncols =2, sharey = True, figsize = (7,4)) # creating platform to create plots

sns.histplot(x = "column_name", data = dataframe, ax = ax0) # creating plot
sns.histplot(x = "column_name", data = dataframe, ax = ax1) # creating plot 

ax1.set(xlabel = "label", xlim = (0, 10000)) # setting x_label and x-axis range 
ax1.axvline(x = value, label = "label", linestyle = "--") # creating a vertical line 
ax1.legend()

In [None]:
# categorical data represantation with striplot
sns.stripplot(x = "column_name", y = "column_name", data = dataframe, jitter = True)

In [None]:
# categorical data represantation with swarmplot
sns.swarmplot(x = "column_name", y = "column_name", data = dataframe)

In [None]:
# categorical data represantation with boxplot
sns.boxplot(x = "column_name", y = "column_name", data = dataframe)

In [2]:
# categorical data represantation with violinplot
sns.violinplot(x = "column_name", y = "column_name", data = dataframe)

In [None]:
# categorical data represantation with boxenplot
sns.boxenpot(x = "column_name". y = "column_name", data = dataframe)

In [None]:
# categorical data represantation with boxenplot
sns.barplot(x = "column_name", y = "column_name", data = dataframe, hue = "column_name")

In [None]:
# categorical data represantation with pointplot
sns.pointplot(x = "column_name", y = "column_name", data = dataframe)

In [None]:
sns.countplot(x = "column_name", y = "column_name", data = dataframe, hue = "column_name")

In [None]:
# regression plots with seaborn
sns.regplot(x = "column_name", y = "column_name", data = dataframe, marker = "+", order = 2) # order for polinomial regression 
# x_jitter, x_estimator, x_bins these are more parameters in the regplot 

In [None]:
# residplot with seaborn
sns.residplot(x = "column_name", y = "column_name", data = dataframe, order = 2) # order for polinomial regression 

In [None]:
# pandas crosstab() function used to prepare data to heatmap() function in the seaborn 
heatmap =  pd.crosstab = (index = "column_name", columns = "column_name", values = "column_name", aggfunc = "mean")
sns.heatmap(heatmap, annot = True, fmt = "d", cmap = "", cbar = True, linewidths = 0.5) # fmt for including text in heatmap cell

In [None]:
# creatting a correlation matrix 
columns = ["column_name", "column_name", "column_name"]
sns.heatmap(dataframe[columns],corr(), cmap = "") 

In [None]:
plot = sns.FacetGrid(dataframe, col = "column_name") # creating faceted plots 
plot.map(sns.boxplot, col = "column_name", order = [])

#catplot 
sns.catplot(x = "column_name", y = "column_name", data = dataframe, kind = "box")

In [None]:
sns.lmplot(x = "column_name", y = "column_name", data = dataframe) # same as below 

plot = sns.FacetGrid(data = dataframe, col = "column_name")
plot.map(sns.scatter, col = "column_name")

In [None]:
# PairGrid shows pairwise relationship between data elements
plot = sns.PairGrid(data= dataframe, vars = ["column_name", "column_name"])
plot.map(sns.scatterplot)
# placement control 
plot = sns.PairGrid(data= dataframe, vars = ["column_name", "column_name"])
plot.map_diag(sns.hist)
plot.map_offdiag(sns.scatterplot)

In [None]:
# pairplot is a shortcut for the PairGrid
sns.pairplot(data= dataframe, vars = ["column_name", "column_name"], kind = "box", diag_kind = "hist")

In [None]:
# creating JointGrid() 
plot = sns.JointGrid(x = "column_name", y = "column_name", data = dataframe)
plot.plot(sns.regplot, sns.histplot)

In [None]:
# advanced JoinGrid()
plot = sns.JoinGrid(data= dataframe, x = "column_name", y = "column_name")
plot.plot_joint(sns.kdeplot) # inner plot 
plot.plot_marginals(sns.kdeplot, shade = True) # outer plot 

In [None]:
# creating jointplot()
sns.jointplot(x = "column_name", y = "column_name", data = dataframe, kind = "hex")

# advanced plotting
plot = sns.jointplot().plot_joint(sns.hist)