# Matplotlib

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

%matplotlib inline

# Task 1 Line plot

You have lists of `years` and `population`, complete the following tasks:
* create a line plot;
* set the title and labels for x and y axis;
* set the title for the plot;
* show it.

> Please, note: you can get more information about line plot [here](https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.plot.html).

In [None]:
years = [2015, 2016, 2017, 2018, 2019]
population = [100, 120, 150, 180, 200]

df = pd.DataFrame({
    "years": years, 
    "population": population
})
plt.plot(df["years"], df["population"])
plt.title("Population chart (2015 to 2019)")
plt.xlabel("years")
plt.ylabel("population")
plt.show()

# Task 2 Scatter plot

You have random data for `x` and `y`. Also, you have random `colors` and `sizes` for each point.

Complete the following tasks:
* create a scatter plot;
* fill the plot with colors;
* fill the plot with sizes;
* set the title and labels for x and y axis;
* set the colorbar label;
* show it.

> Please, note: you can get more information about scatter plot [here](https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.scatter.html).

In [None]:
x = np.random.rand(50)
y = np.random.rand(50)
colors = np.random.rand(50)
sizes = 100 * np.random.rand(50)

plt.scatter(x, y, c=colors, s=sizes)
plt.title("Scatter with random colors and size circles")
plt.xlabel("X axis")
plt.ylabel("Y axis")
plt.colorbar()
plt.show()

# Task 3 Bar plot

You have lists of `categories` and `sales`, complete the following tasks:
* create a bar plot;
* set the title and labels for x and y axis;
* rate the x ticks by 45 degrees;
* show it.

> Please, note: you can get more information about bar plot [here](https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.bar.html).

In [None]:
categories = ["Category A", "Category B", "Category C", "Category D"]
sales = [350, 480, 210, 590]

plt.bar(categories, sales)
plt.title("Displaying sales by category")
plt.xlabel("categories")
plt.ylabel("sales")
plt.xticks(rotation=45)
plt.show()

# Task 4 Histogram

You have randomly generated data, complete the following tasks:

* create a histogram;
* set the title and labels for x and y axis;
* make bins size not so wide;
* show it.

> Please, note: you can get more information about histogram [here](https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.hist.html).

In [None]:
data = np.random.normal(0, 1, 1000)

plt.hist(data, bins=30)
plt.title("Frequency by values range (random numbers)")
plt.xlabel("random numbers")
plt.ylabel("frequency")
plt.show()

# Task 5 Pie plot

You have lists of `labels` and `market_share`, complete the following tasks:
* create a pie plot;
* set the title;
* make numbers in format like `10.0%`;
* set the start angle for better view;
* show it.

> Please, note: you can get more information about pie plot [here](https://matplotlib.org/stable/gallery/pie_and_polar_charts/pie_features.html).

In [None]:
labels = ["Product A", "Product B", "Product C", "Product D"]
market_share = [30, 20, 15, 35]

plt.pie(market_share, labels=labels, autopct="%1.1f%%", startangle=40)
plt.title("Pie plot of Products")
plt.show()

# Task 6 Subplots

You have sin and cos functions, complete the following tasks:

* create a figure with 2 subplots;
* plot sin function in the first subplot;
* plot cos function in the second subplot;
* set the title and labels for x and y axis for each subplot;
* set the title for the figure;
* set different colors for each plot;
* show it horizontally.

> PLease, note: you can get more information about subplots [here](https://matplotlib.org/stable/gallery/subplots_axes_and_figures/subplots_demo.html).

In [None]:
x = np.linspace(0, 5, 100)
y1 = np.sin(x)
y2 = np.cos(x)

fig, axs = plt.subplots(1, 2)
fig.suptitle("Sin & Cos of number x")
axs[0].plot(x, y1, "tab:red")
axs[0].set_title("sin of x")
axs[0].set_ylabel("sin(x)")
axs[0].set_xlabel("number x")

axs[1].plot(x, y2, "tab:green")
axs[1].set_title("cos of x")
axs[1].set_ylabel("cos(x)")
axs[1].set_xlabel("number x")

plt.tight_layout()
plt.show()

# Task 7 Time Series

You have prepared data for time series, complete the following tasks:

* create a line plot;
* set the title and labels for x and y axis;
* rotate x ticks by 45 degrees;
* show it.

> Please, note: you can get more information about time series [here](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html).

In [None]:
dates = pd.date_range("2023-01-01", "2023-12-31", freq="D")
values = pd.Series(range(len(dates)), index=dates)

plt.plot(values)
plt.title("Time Series")
plt.xlabel("dates")
plt.ylabel("number of days")
plt.xticks(rotation=45)
plt.show()

# (Optional) Task 8 Surface 3D Plot

You have prepared data for 3D surface plot, complete the following tasks:
* create a figure;
* add a subplot for 3D surface plot;
* plot the surface, `cmap` can be `viridis`;
* set the title and labels for x, y and z axis;
* show it.

> Please, note: you can get more information about 3D surface plot [here](https://matplotlib.org/stable/gallery/mplot3d/surface3d.html).

In [None]:
x = np.linspace(-5, 5, 100)
y = np.linspace(-5, 5, 100)
X, Y = np.meshgrid(x, y)
Z = X**2 + Y**2

fig, ax = plt.subplots(subplot_kw={"projection": "3d", })

ax.plot_surface(X, Y, Z, cmap="viridis")
ax.set_title("Surface 3D Plot")
ax.set_xlabel("X axis")
ax.set_ylabel("Y axis")

ax.text(x=5, y=6, z=Z.max()+5, s="Z axis", fontsize=10)

plt.show()

# (Optional) Task 9

You are working for a retail company that wants to understand its customer base better. The company has collected various demographic and purchasing behavior data for a sample of customers. Your task is to create a scatter plot matrix to visualize the relationships between different variables and identify potential customer segments based on their characteristics. The scatter plot matrix will allow you to observe the pairwise relationships between multiple variables simultaneously.

> Please, note: you can find the file with data in the `data` folder.

In [15]:
df = pd.read_csv("../data/customer_data.csv", index_col="CustomerID")

In [16]:
df.shape

(200, 4)

In [17]:
df.head()

Unnamed: 0_level_0,Gender,Age,Annual Income,Spending Score
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,Male,19,15,39
2,Male,21,15,81
3,Female,20,16,6
4,Female,23,16,77
5,Female,31,17,40


In [22]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 200 entries, 1 to 200
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Gender          200 non-null    object
 1   Age             200 non-null    int64 
 2   Annual Income   200 non-null    int64 
 3   Spending Score  200 non-null    int64 
dtypes: int64(3), object(1)
memory usage: 7.8+ KB


In [21]:
import seaborn as sns

In [None]:
# Pairplot by Gender
sns.pairplot(df, hue="Gender")
plt.suptitle("Pairplot by Gender", y=1.02)
plt.show()

In [None]:
# Pairplot by Age Groups: Youth, Adult, Senior
bins = [17, 25, 40, 100]
labels = ["Youth", "Adult", "Senior"]
df["AgeGroup"] = pd.cut(df["Age"], bins=bins, labels=labels)

sns.pairplot(df, hue="AgeGroup")
plt.suptitle("Pairplot by Age Groups", y=1.02)
plt.show()