In [None]:
### imports
import numpy, pandas
from matplotlib import pyplot
### displaying matplotlib figures within notebook
%matplotlib inline

## Two Ways of Plotting (x)

#### Stateless plotting (x)

In [None]:
### stateless blank plot
pyplot.plot()
### displaying plot without matplotlib messages
pyplot.show()

<span style="color:navajowhite">
--- matplotlib syntax<br>
semicolon (;) after last plot code line substitutes pyplot.show()
</span>

In [None]:
### stateless line plot, [1,2,3,4] (y-axis) only
pyplot.plot([1,2,3,4]);

In [None]:
### stateless line plot, [11,22,33,44] (y-axis) over [1,2,3,4] (x-axis)
pyplot.plot([1,2,3,4], [11,22,33,44]);

#### Object oriented plotting (x)

<span style="color:navajowhite">
--- matplotlib concept<br>
figure object represents entire figure with all subplots<br>
axis object(s) represent(s) subplot(s)
</span>

In [None]:
### object oriented figure (figure), one subplot (axis)
figure, axis = pyplot.subplots()
### "axis" subplot, line plot, [11,22,33,44] (y-axis) over [1,2,3,4] (x-axis)
axis.plot([1,2,3,4], [11,22,33,44]);

## Matplotlib Workflow (x)

<span style="color:navajowhite">
--- matplotlib workflow steps<br>
preparing data<br>
creating figure<br>
plotting data<br>
customizing plot<br>
saving figure<br>
</span>

In [None]:
### preparing data
ydata = [11,22,33,44]
xdata = [1,2,3,4]

### object oriented figure (figure), one subplot (axis), with=5in x height=5in (figsize)
figure, axis = pyplot.subplots(figsize=(5,5))

### "axis" subplot, line plot, ydata (y-axis) over xdata (x-axis)
axis.plot(xdata, ydata)

### "axis" subplot, setting labels, subplot title (title), y-axis legend (ylabel), x-axis legend (xlabel)
axis.set(title = "Sample Plot", ylabel = "y-axis", xlabel = "x-axis")

### saving figure
figure.savefig("plot-sample.png")

<span style="color:navajowhite">
--- matplotlib syntax<br>
figure.savefig() also substitutes pyplot.show()
</span>

## Plotting from NumPy / Plot Types (x)

#### Line plots (x)

In [None]:
### numpy linear array (linspace_array), begins at 0 (start), ends at 10 (stop), contains 100 numbers (num)
linspace_array = numpy.linspace(start=0, stop=10, num=100)
### object oriented figure (figure), one subplot (axis)
figure, axis = pyplot.subplots()
### "axis" subplot, line plot, linspace_array squared (y-axis) over linspace_array (x-axis)
axis.plot(linspace_array, linspace_array**2);

#### Scatter plots (x)

In [None]:
### numpy linear array (linspace_array), begins at 0 (start), ends at 10 (stop), contains 100 numbers (num)
linspace_array = numpy.linspace(start=0, stop=10, num=100)
### object oriented figure (figure), one subplot (axis)
figure, axis = pyplot.subplots()
### "axis" subplot, scatter plot, sine of linspace_array (y) over linspace_array (x)
axis.scatter(y=numpy.sin(linspace_array), x=linspace_array);

#### Histogram plots (x)

<span style="color:navajowhite">
--- histogram concept<br>
histogram plot displays probability distribution of dataset<br>
dataset entire span (min value - max value) is divided into equally sized ranges (bins)<br>
heights of histogram columns represent number of values within each bin
</span>

In [None]:
### numpy normally distributed random array (randn_array), contains 1000 numbers
randn_array = numpy.random.randn(1000)
### object oriented figure (figure), one subplot (axis)
figure, axis = pyplot.subplots()
### "axis" subplot, histogram plot, number of occurrences (y-axis) over randn_array bins (x)
axis.hist(x=randn_array);

#### Bar plots (x)

In [None]:
### dictionary data
butter_prices = {
    "Almond Butter": 10,
    "Peanut Butter": 8,
    "Cashew Butter": 12}
butter_prices

In [None]:
### object oriented figure (figure), one subplot (axis)
figure, axis = pyplot.subplots()
### "axis" subplot, vertical bar plot, dictionary values (height) over list of dictionary keys (x)
axis.bar(height=butter_prices.values(), x=list(butter_prices.keys()))
### "axis" subplot, setting labels, subplot title (title), y-axis legend (ylabel)
axis.set(title="Dan's Nut Butter Store", ylabel="Price ($)");

In [None]:
### object oriented figure (figure), one subplot (axis)
figure, axis = pyplot.subplots()
### "axis" subplot, horizontal bar plot, dictionary values (width) over list of dictionary keys (y)
axis.barh(width=butter_prices.values(), y=list(butter_prices.keys()))
### "axis" subplot, setting labels, subplot title (title), x-axis legend (xlabel)
axis.set(title="Dan's Nut Butter Store", xlabel="Price ($)");

#### Subplots (x)

In [None]:
### object oriented figure (figure), four subplots (axis)
### row1col1 (axis11), row1col2 (axis12), row2col1 (axis21), row2col2 (axis22)
### two grid rows (nrows), two grid columns (ncols), width=10in x height=5in (figsize)
figure, ((axis11, axis12), (axis21, axis22)) = pyplot.subplots(nrows=2, ncols=2, figsize=(10,5))

### "axis11" subplot, line plot, linspace_array squared (y-axis) over linspace_array (x-axis)
axis11.plot(linspace_array, linspace_array**2)

### "axis12" subplot, scatter plot, numpy random array (y) over numpy random array (x)
### numpy random array (rand), contains 10 numbers
axis12.scatter(y=numpy.random.rand(10), x=numpy.random.rand(10))

### "axis21" subplot, histogram plot, number of occurrences (y-axis) over randn_array bins (x)
axis21.hist(x=randn_array)

### "axis22" subplot, vertical bar plot, dictionary values (height) over list of dictionary keys (x)
axis22.bar(height=butter_prices.values(), x=list(butter_prices.keys()));

In [None]:
### subplot option 2
figure, axes = pyplot.subplots(nrows=2, ncols=2, figsize=(10,5))
axes[0, 0].plot(linspace_array, linspace_array**2)
axes[0, 1].scatter(y=numpy.random.rand(10), x=numpy.random.rand(10))
axes[1, 0].hist(x=randn_array)
axes[1, 1].bar(height=butter_prices.values(), x=list(butter_prices.keys()));

## Plotting from Pandas (x)

#### Plotting from random series (x)

In [None]:
### numpy normally distributed random array (randn_array), contains 1000 numbers
randn_array = numpy.random.randn(1000)
### pandas datetime index (date_index), contains series of dates (date_range)
### begins at 1/1/2020 (start), contains 1000 dates (periods)
date_index = pandas.date_range(start="1/1/2020", periods=1000)
### pandas series, contains randn_array numbers (data), indexed by date_index (index)
randn_series = pandas.Series(data=randn_array, index=date_index)
### pandas series, contains cumulative summation of randn_series (cumsum)
cumsum_series = randn_series.cumsum()
### pandas stateless line plot, cumsum series data (y-axis) over cumsum series index (x-axis)
cumsum_series.plot();

#### Plotting from car sales dataframe (x)

In [None]:
### pandas dataframe (carsales_df), reading data from csv file
carsales_df = pandas.read_csv("data-car-sales.csv")
carsales_df

In [None]:
### carsales_df dataframe, "Price" column, removing non-numeric characters using regex
carsales_df["Price"] = carsales_df["Price"].str.replace("[\$\,\.]", "", regex=True)
### carsales_df dataframe, "Price" column, removing decimal characters
carsales_df["Price"] = carsales_df["Price"].str[:-2]
### carsales_df dataframe, "Price" column, converting datatype str > int
carsales_df["Price"] = carsales_df["Price"].astype(int)
carsales_df

In [None]:
### pandas datetime index (date_index), contains series of dates (date_range)
### begins at 1/1/2020 (start), contains 10 dates (periods)
date_index = pandas.date_range(start="1/1/2020", periods=10)
### carsales_df dataframe, creating "Sale Date" colum, contains date_index
carsales_df["Sale Date"] = date_index
carsales_df

In [None]:
### carsales_df dataframe, creating "Total Sales" column, contains cumulative summation of "Price" column
carsales_df["Total Sales"] = carsales_df["Price"].cumsum()
carsales_df

In [None]:
### pandas stateless line plot, "Total Sales" column (y) over "Sale Date" column (x)
carsales_df.plot(y="Total Sales", x="Sale Date");

In [None]:
### pandas stateless scatter plot, "Price" column (y) over "Odometer (KM)" column" (x)
carsales_df.plot.scatter(y="Price", x="Odometer (KM)");

In [None]:
### pandas stateless vertical bar plot, "Odometer" column (y) over "Make" column (x)
carsales_df.plot.bar(y="Odometer (KM)", x="Make");

In [None]:
### pandas stateless histogram plot
### number of occurrances (y-axis) over "Odometer (KM)" column bins, ten histogram columns (bins)
carsales_df["Odometer (KM)"].plot.hist(bins=10);

#### Plotting from heart disease dataframe - Stateless (x)

<span style="color:navajowhite">
--- statistics concept<br>
an outlier is at least 3 standard deviations away from mean
</span>

In [None]:
### pandas dataframe (heart_disease), reading data from csv file
heart_disease = pandas.read_csv("data-heart-disease.csv")
heart_disease

In [None]:
### pandas stateless histogram plot
### number of occurrances (y-axis) over "age" column bins, ten histogram columns (bins)
heart_disease["age"].plot.hist(bins=10);

In [None]:
### pandas stateless figure, 14 histogram subplots (subplots=True)
### number of occurrances (y-axis) over each column bins, width=10in x height=15in (figsize)
heart_disease.plot.hist(subplots=True, figsize=(10,15));

#### Plotting from heart disease dataframe - Object oriented (x)

<span style="color:navajowhite">
--- matplotlib concept<br>
object oriented plotting should be used for more advanced figures
</span>

In [None]:
### pandas dataframe (over_fifty), contains rows of heart_disease dataframe wherein 50 < "age" column value
over_fifty = heart_disease.loc[50 < heart_disease["age"]]
### pandas stateless scatter plot, "chol" column (y) over "age" column (x), categories by "target" column (c)
over_fifty.plot.scatter(y="chol", x="age", c="target");

In [None]:
### object oriented figure (figure), one subplot (axis), width=10in x height=6in (figsize)
figure, axis = pyplot.subplots(figsize=(10,6))

### pyplot plot object (chol_age), contains "axis" subplot
### scatter plot, "chol" column (y) over "age" column (x), categories by "target" column (c)
chol_age = axis.scatter(y=over_fifty["chol"], x=over_fifty["age"], c=over_fifty["target"])
### "axis" subplot, horizontal line (axhline) at mean of "chol" column (y), dashed line (linestyle)
axis.axhline(y=over_fifty["chol"].mean(), linestyle="--")

### "axis" subplot, setting x-axis range (set_xlim)
axis.set_xlim([45,85])

### "axis" subplot, setting labels, subplot title (title), y-axis legend (ylabel), x-axis legend (xlabel)
axis.set(title="Cholesterol and Heart Disease", ylabel="Cholesterol", xlabel="Age")
### "axis" subplot, category legend
### categories (legend_elements) taken from plot object (*chol_age), setting legend title (title)
axis.legend(*chol_age.legend_elements(), title="Target");

#### Plotting from heart disease dataframe - Subplots (x)

In [None]:
### object oriented figure (figure), two subplots (axis), row1col1 (axis11), row2col1 (axis21)
### two grid rows (nrows), one grid columns (ncols), shared x-axis (sharex), width=10in x height=10in (figsize)
figure, (axis11, axis21) = pyplot.subplots(nrows=2, ncols=1, sharex=True, figsize=(10,10))

### pyplot plot object (chol_age), contains axis11 subplot
### scatter plot, over_fifty/chol (y) over over_fifty/age (x), categories by over_fifty/target (c)
chol_age = axis11.scatter(y=over_fifty["chol"], x=over_fifty["age"], c=over_fifty["target"])
### axis11 subplot, horizontal line (axhline) at mean of over_fifty/chol (y), dashed line (linestyle)
axis11.axhline(y=over_fifty["chol"].mean(), linestyle="--")
### pyplot plot object (thalach_age), contains axis21 subplot
### scatter plot, over_fifty/thalach (y) over over_fifty/age (x), categories by over_fifty/target (c)
thalach_age = axis21.scatter(y=over_fifty["thalach"], x=over_fifty["age"], c=over_fifty["target"])
### axis21 subplot, horizontal line (axhline) at mean of over_fifty/thalach (y), dashed line (linestyle)
axis21.axhline(y=over_fifty["thalach"].mean(), linestyle="--")

### all subplots, setting x-axis range (set-xlim)
axis21.set_xlim([45,85])

### figure title (suptitle) text (t) and typography (fontsize, fontweight)
figure.suptitle(t="Heart Disease Analysis", fontsize=16, fontweight="bold")
### axis11 subplot, category legend
### categories (legend_elements) taken from plot object (*chol_age), setting legend title (title)
axis11.legend(*chol_age.legend_elements(), title="Target")
### axis11 subplot, setting labels, subplot title (title), y-axis legend (ylabel)
axis11.set(title="Cholesterol and Heart Disease", ylabel="Cholesterol")
### axis21 subplot, category legend
### categories (legend_elements) taken from plot object (*thalach_age), setting legend title (title)
axis21.legend(*thalach_age.legend_elements(), title="Target");
### axis21 subplot, setting labels, subplot title (title), y-axis legend (ylabel), x-axis legend (xlabel)
axis21.set(title="Max Heart Rate and Heart Disease", ylabel="Max Heart Rate", xlabel="Age")

## Customizing Plots / Styles / Colormaps (x)

#### Applying style (x)

<span style="color:navajowhite">
--- matplotlib styles<br>
predefined styling elements used for defining appearance of entire plots<br>
see <a href=https://matplotlib.org/stable/gallery/style_sheets/style_sheets_reference.html>matplotlib styles</a>
</span>

In [None]:
### styles available in matplotlib library
pyplot.style.available

In [None]:
### numpy normally distributed random matrix (plot_array), 10 rows, 4 columns
plot_array = numpy.random.randn(10, 4)
### pandas dataframe (plot_df), contains plot_array (data), setting column names (columns)
plot_df = pandas.DataFrame(data=plot_array, columns=["a","b","c","d"])

### changing plot style
pyplot.style.use("ggplot")

### pandas stateless bar plot, plot_df/values (y-axis) over plot_df/index (x-axis), categories by plot_df/columns
axis = plot_df.plot.bar()

### toggling plot category legend (set_visible)
axis.legend().set_visible(True)

### setting labels, plot title (title), y-axis legend (ylabel), x-axis legend (xlabel)
axis.set(title="Random Number Bar Graph", ylabel="Random Number", xlabel="Index");

#### Applying colormap (>>>)

<span style="color:navajowhite">
--- matplotlib colormaps<br>
predefined color schemes used for coloring various plot elements<br>
see <a href=https://matplotlib.org/stable/tutorials/colors/colormaps.html>matplotlib colormap documentation</a>
</span>

In [None]:
### preparing data -----------------------------------------------------------------------------------------------------

### pandas dataframe (heart_disease), reading data from csv file
heart_disease = pandas.read_csv("data-heart-disease.csv")
### pandas dataframe (over_fifty), contains heart_disease dataframe on condition 50 < heart_disease/age value
over_fifty = heart_disease.loc[50 < heart_disease["age"]]

### creating figure ----------------------------------------------------------------------------------------------------

### object oriented figure (figure), 2 subplots, row1col1 (axis11), row2col1 (axis21)
### 2 grid rows (nrows), 1 grid columns (ncols), shared x-axis (sharex), width=10in x height=10in (figsize)
figure, (axis11, axis21) = pyplot.subplots(nrows=2, ncols=1, sharex=True, figsize=(10,10))

### plotting -----------------------------------------------------------------------------------------------------------

### pyplot plot object (chol_age), contains axis11 subplot, scatter plot
### over_fifty/chol (y) over over_fifty/age (x), categories by over_fifty/target (c), color map summer (cmap)
chol_age = axis11.scatter(y=over_fifty["chol"], x=over_fifty["age"], c=over_fifty["target"], cmap="summer")
### axis11 subplot, horizontal line (axhline) at mean of over_fifty/chol (y), dashed line (linestyle)
axis11.axhline(y=over_fifty["chol"].mean(), linestyle="--")

### pyplot plot object (thalach_age), contains axis21 subplot, scatter plot
### over_fifty/thalach (y) over over_fifty/age (x), categories by over_fifty/target (c), color map winter (cmap)
thalach_age = axis21.scatter(y=over_fifty["thalach"], x=over_fifty["age"], c=over_fifty["target"], cmap="winter")
### axis21 subplot, horizontal line (axhline) at mean of over_fifty/thalach (y), dashed line (linestyle)
axis21.axhline(y=over_fifty["thalach"].mean(), linestyle="--")

### changing plot style
pyplot.style.use("seaborn-whitegrid")

### x-axis range 50-85
axis1.set_xlim([50,85])
### axis2 y-axis range 60-200
axis2.set_ylim([60,200])

### figure title text and typography
figure.suptitle(t="Heart Disease Analysis", fontsize=16, fontweight="bold")
### axis1 title, y-axis legend (ylabel)
axis1.set(title="Cholesterol and Heart Disease", ylabel="Cholesterol")
### axis1 category legend, legend title
axis1.legend(*chol_age.legend_elements(), title="Target")
### axis2 title, y-axis legend (ylabel), x-axis legend (xlabel)
axis2.set(title="Max Heart Rate and Heart Disease", ylabel="Max Heart Rate", xlabel="Age")
### axis2 category legend, legend title
axis2.legend(*thalach_age.legend_elements(), title="Target")

### saving and showing figure
figure.savefig("heart-disease-analysis.png")