# Data Visualization (Matplotlib)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

### Dataset: Heart Disease

In [None]:
df = pd.read_csv("heart-disease.csv")

df.head()

## Figures and Axes

In [None]:
# subplots() returns a figure and axes (unpacked)

fig, ax = plt.subplots()   

---

# Line Plot

In [None]:
# Defaults to display the index of the row on the x axis

fig, ax = plt.subplots()

ax.plot(df["max_hr"]);

## Set properties

In [None]:
fig, ax = plt.subplots()

ax.plot(df["max_hr"]);

# A more convenient way of setting properties
ax.set(title="Line Plot", xlabel="Row Index", ylabel="Max HR");

In [None]:
fig, ax = plt.subplots()

ax.plot(df["max_hr"]);

# A more flexible way of setting properties
ax.set_title("Line Plot")
ax.set_xlabel("Row Index")
ax.set_ylabel("Max HR");

## Create multiple plots

### Separate rows

In [None]:
# Render plots in separate axes;   subplots(n_rows, n_cols)
# 2 rows

fig, axes = plt.subplots(2)

axes[0].plot(df["max_hr"])
axes[1].plot(df["age"]);

In [None]:
# Render plots in separate axes;   subplots(n_rows, n_cols)
# 2 rows

fig, (top, bot) = plt.subplots(2)

top.plot(df["max_hr"])
bot.plot(df["age"]);

### Separate columns

In [None]:
# Render plots in separate axes;   subplots(n_rows, n_cols)
# 1 row, 2 columns

fig, (left, right) = plt.subplots(1,2)


left.plot(df["max_hr"])
right.plot(df["age"]);

## Set the figsize

In [None]:
# Set the size of the figure  (figsize=(w, h))
fig, (left, right) = plt.subplots(1,2, figsize=(15, 5))

left.plot(df["max_hr"])
right.plot(df["age"]); 

---

# Histogram

In [None]:
fig, ax = plt.subplots()

ax.hist(df['age'], color='g', edgecolor='black')

ax.set(xlabel="Age", title="Histogram");


# The basic built-in colors:
# b: blue
# g: green
# r: red
# c: cyan
# m: magenta
# y: yellow
# k: black
# w: white


### Set the number of bins to display

In [None]:
fig, ax = plt.subplots()

ax.hist(df['age'], color = 'g', edgecolor='black', bins = 30)

ax.set(xlabel="Age", title="Histogram");

### Set a custom color

[Named Colors](https://matplotlib.org/stable/gallery/color/named_colors.html)

[HTML Color Codes](https://htmlcolorcodes.com)

In [None]:
fig, ax = plt.subplots()

ax.hist(df['age'], color = '#9033FF', edgecolor='black')

ax.set_title('Histogram')
ax.set_xlabel('Age');

### Format the labels (increase font size)

In [None]:
fig, ax = plt.subplots()

ax.hist(df['age'], color = '#9616FA', edgecolor='black')

# Increase the font size of the labels
ax.set_title('Histogram', fontsize=16)
ax.set_xlabel('Age', fontsize=16);

---

# Boxplot

In [None]:
fig, ax = plt.subplots(figsize = (5, 3))

ax.boxplot(df["age"]);

## Outliers
Values greater than 1.5 of the IQR from the 1st or 3rd quartile.

In [None]:
fig, ax = plt.subplots(figsize = (5, 3))

ax.boxplot(df["rest_bp"]);

## Create paired boxplots

### Using Ages for Data

In [None]:
female_ages = df.loc[df["sex"] == "female", "age"]
male_ages = df.loc[df["sex"] == "male", "age"]

In [None]:
fig, ax = plt.subplots(figsize = (15, 5))
                                                # labels for ticks
ax.boxplot([female_ages, male_ages], labels = ["Female ages", "Male ages"])

ax.set_xlabel('Gender', fontsize=16)
ax.set_ylabel('Age', fontsize=16);

### Set tick labels and modify fontsize

In [None]:
fig, ax = plt.subplots(figsize = (15, 5))
                              
ax.boxplot([female_ages, male_ages])

ax.set_xlabel('Gender', fontsize=16)
ax.set_ylabel('Age', fontsize=16);

# more flexibility for styling tick labels
ax.set_xticklabels(["Female age", "Male age"], fontsize=14, color = "darkorange");

---

# Scatter Plot

In [None]:
fig, ax = plt.subplots()

#              (x-axis,       y-axis,      alpha,    size,     color)
ax.scatter(x=df['age'], y=df['max_hr'], alpha=.3, s = 100, c = "black")
 
# Set the properties
ax.set(title="Heart Disease", xlabel='Age', ylabel="Max HR");

### Auto-set the colors based on category membership
The target variable must be an integer.

In [None]:
fig, ax = plt.subplots()

ax.scatter(df['age'], df['max_hr'], alpha=.3, s = 200, c = df["heart_disease"])

ax.set(title="Heart Disease", xlabel='Age', ylabel="Max HR");

### Modify the colormap
[Colormaps](https://matplotlib.org/stable/gallery/color/colormap_reference.html)

In [None]:
fig, ax = plt.subplots()

ax.scatter(df['age'], df['max_hr'], alpha=.3, s = 200, c = df["heart_disease"], cmap = "winter");