# Matplotlib

In [12]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

%matplotlib inline

# Task 1 Line plot

You have lists of `years` and `population`, complete the following tasks:
* create a line plot;
* set the title and labels for x and y axis;
* set the title for the plot;
* show it.

> Please, note: you can get more information about line plot [here](https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.plot.html).

In [3]:
years = [2015, 2016, 2017, 2018, 2019]
population = [100, 120, 150, 180, 200]

# Task 1: Line plot
plt.figure(figsize=(10, 6))
plt.plot(years, population, marker='o', linewidth=2, markersize=8)
plt.xlabel('Years')
plt.ylabel('Population (thousands)')
plt.title('Population Growth Over Years')
plt.grid(True, alpha=0.3)
plt.show()

# Task 2 Scatter plot

You have random data for `x` and `y`. Also, you have random `colors` and `sizes` for each point.

Complete the following tasks:
* create a scatter plot;
* fill the plot with colors;
* fill the plot with sizes;
* set the title and labels for x and y axis;
* set the colorbar label;
* show it.

> Please, note: you can get more information about scatter plot [here](https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.scatter.html).

In [5]:
x = np.random.rand(50)
y = np.random.rand(50)
colors = np.random.rand(50)
sizes = 100 * np.random.rand(50)

# Task 2: Scatter plot
plt.figure(figsize=(10, 8))
scatter = plt.scatter(x, y, c=colors, s=sizes, alpha=0.7, cmap='viridis')
plt.xlabel('X Values')
plt.ylabel('Y Values')
plt.title('Scatter Plot with Colors and Sizes')
plt.colorbar(scatter, label='Color Scale')
plt.show()

# Task 3 Bar plot

You have lists of `categories` and `sales`, complete the following tasks:
* create a bar plot;
* set the title and labels for x and y axis;
* rate the x ticks by 45 degrees;
* show it.

> Please, note: you can get more information about bar plot [here](https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.bar.html).

In [6]:
categories = ["Category A", "Category B", "Category C", "Category D"]
sales = [350, 480, 210, 590]

# Task 3: Bar plot
plt.figure(figsize=(10, 6))
plt.bar(categories, sales, color=['#FF6B6B', '#4ECDC4', '#45B7D1', '#FFA07A'])
plt.xlabel('Categories')
plt.ylabel('Sales')
plt.title('Sales by Category')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


# Task 4 Histogram

You have randomly generated data, complete the following tasks:

* create a histogram;
* set the title and labels for x and y axis;
* make bins size not so wide;
* show it.

> Please, note: you can get more information about histogram [here](https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.hist.html).

In [8]:
data = np.random.normal(0, 1, 1000)

# Task 4: Histogram
plt.figure(figsize=(10, 6))
plt.hist(data, bins=30, color='skyblue', alpha=0.7, edgecolor='black')
plt.xlabel('Values')
plt.ylabel('Frequency')
plt.title('Histogram of Normal Distribution Data')
plt.grid(True, alpha=0.3)
plt.show()


# Task 5 Pie plot

You have lists of `labels` and `market_share`, complete the following tasks:
* create a pie plot;
* set the title;
* make numbers in format like `10.0%`;
* set the start angle for better view;
* show it.

> Please, note: you can get more information about pie plot [here](https://matplotlib.org/stable/gallery/pie_and_polar_charts/pie_features.html).

In [10]:
labels = ["Product A", "Product B", "Product C", "Product D"]
market_share = [30, 20, 15, 35]

# Task 5: Pie plot
plt.figure(figsize=(8, 8))
colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#FFA07A']
plt.pie(market_share, labels=labels, autopct='%1.1f%%', startangle=90, colors=colors)
plt.title('Market Share Distribution')
plt.axis('equal')
plt.show()

# Task 6 Subplots

You have sin and cos functions, complete the following tasks:

* create a figure with 2 subplots;
* plot sin function in the first subplot;
* plot cos function in the second subplot;
* set the title and labels for x and y axis for each subplot;
* set the title for the figure;
* set different colors for each plot;
* show it horizontally.

> PLease, note: you can get more information about subplots [here](https://matplotlib.org/stable/gallery/subplots_axes_and_figures/subplots_demo.html).

In [11]:
x = np.linspace(0, 5, 100)
y1 = np.sin(x)
y2 = np.cos(x)

# Task 6: Subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

# Sin plot
ax1.plot(x, y1, linewidth=2, color='red')
ax1.set_xlabel('X Values')
ax1.set_ylabel('Sin(X)')
ax1.set_title('Sine Function')
ax1.grid(True, alpha=0.3)

# Cos plot
ax2.plot(x, y2, linewidth=2, color='blue')
ax2.set_xlabel('X Values')
ax2.set_ylabel('Cos(X)')
ax2.set_title('Cosine Function')
ax2.grid(True, alpha=0.3)

fig.suptitle('Trigonometric Functions')
plt.tight_layout()
plt.show()


# Task 7 Time Series

You have prepared data for time series, complete the following tasks:

* create a line plot;
* set the title and labels for x and y axis;
* rotate x ticks by 45 degrees;
* show it.

> Please, note: you can get more information about time series [here](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html).

In [13]:
dates = pd.date_range("2023-01-01", "2023-12-31", freq="D")
values = pd.Series(range(len(dates)), index=dates)

# Task 7: Time Series
plt.figure(figsize=(12, 6))
plt.plot(dates, values, linewidth=2, color='purple')
plt.xlabel('Date')
plt.ylabel('Values')
plt.title('Time Series Data Throughout 2023')
plt.xticks(rotation=45)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()


# (Optional) Task 8 Surface 3D Plot

You have prepared data for 3D surface plot, complete the following tasks:
* create a figure;
* add a subplot for 3D surface plot;
* plot the surface, `cmap` can be `viridis`;
* set the title and labels for x, y and z axis;
* show it.

> Please, note: you can get more information about 3D surface plot [here](https://matplotlib.org/stable/gallery/mplot3d/surface3d.html).

In [14]:
x = np.linspace(-5, 5, 100)
y = np.linspace(-5, 5, 100)
X, Y = np.meshgrid(x, y)
Z = X**2 + Y**2

# Task 8: Surface 3D Plot
from mpl_toolkits.mplot3d import Axes3D

fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111, projection='3d')
surface = ax.plot_surface(X, Y, Z, cmap='viridis', alpha=0.8)
ax.set_xlabel('X axis')
ax.set_ylabel('Y axis')
ax.set_zlabel('Z axis')
ax.set_title('3D Surface Plot: Z = X² + Y²')
fig.colorbar(surface)
plt.show()


# (Optional) Task 9

You are working for a retail company that wants to understand its customer base better. The company has collected various demographic and purchasing behavior data for a sample of customers. Your task is to create a scatter plot matrix to visualize the relationships between different variables and identify potential customer segments based on their characteristics. The scatter plot matrix will allow you to observe the pairwise relationships between multiple variables simultaneously.

> Please, note: you can find the file with data in the `data` folder.

In [15]:
# Task 9: Customer Data Analysis - Scatter Plot Matrix
import seaborn as sns

# Load customer data
customer_data = pd.read_csv('../data/customer_data.csv')

# Display basic info about the dataset
print("Dataset Info:")
print(customer_data.head())
print("\nDataset Shape:", customer_data.shape)
print("\nDataset Description:")
print(customer_data.describe())

# Create scatter plot matrix for numerical variables
numerical_cols = ['Age', 'Annual Income', 'Spending Score']

# Create figure with subplots
fig, axes = plt.subplots(3, 3, figsize=(15, 15))
fig.suptitle('Customer Data Scatter Plot Matrix', fontsize=16)

for i, col1 in enumerate(numerical_cols):
    for j, col2 in enumerate(numerical_cols):
        ax = axes[i, j]
        
        if i == j:
            # Diagonal: histograms
            ax.hist(customer_data[col1], bins=20, alpha=0.7, color='skyblue', edgecolor='black')
            ax.set_title(f'Distribution of {col1}')
        else:
            # Off-diagonal: scatter plots colored by gender
            colors = {'Male': 'blue', 'Female': 'red'}
            for gender in customer_data['Gender'].unique():
                mask = customer_data['Gender'] == gender
                ax.scatter(customer_data.loc[mask, col2], customer_data.loc[mask, col1], 
                          c=colors[gender], label=gender, alpha=0.6, s=30)
            
            ax.set_xlabel(col2)
            ax.set_ylabel(col1)
            ax.set_title(f'{col1} vs {col2}')
            
            if i == 0 and j == 1:  # Add legend only once
                ax.legend()

plt.tight_layout()
plt.show()

# Additional analysis: Customer segmentation visualization
plt.figure(figsize=(12, 8))
scatter = plt.scatter(customer_data['Annual Income'], customer_data['Spending Score'], 
                     c=customer_data['Age'], s=60, alpha=0.7, cmap='viridis')
plt.xlabel('Annual Income (k$)')
plt.ylabel('Spending Score (1-100)')
plt.title('Customer Segmentation: Income vs Spending Score (colored by Age)')
plt.colorbar(scatter, label='Age')
plt.grid(True, alpha=0.3)
plt.show()

# Gender distribution pie chart
plt.figure(figsize=(8, 8))
gender_counts = customer_data['Gender'].value_counts()
plt.pie(gender_counts.values, labels=gender_counts.index, autopct='%1.1f%%', 
        startangle=90, colors=['lightblue', 'lightpink'])
plt.title('Gender Distribution in Customer Data')
plt.axis('equal')
plt.show()
