# Lesson 4.1: Matplotlib Basics

## Why Visualize?

"A picture is worth a thousand rows of data."

Before building any ML model, you MUST look at your data visually. Patterns, outliers, and relationships that are invisible in numbers become obvious in charts.

**Matplotlib** is the foundation - every other plotting library builds on it.

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

%matplotlib inline
# This makes plots show directly in the notebook

In [None]:
# LINE PLOT - great for trends over time
days = np.arange(1, 31)
tds_readings = 40 + days * 0.5 + np.random.randn(30) * 3  # TDS increasing with noise

plt.figure(figsize=(10, 4))
plt.plot(days, tds_readings, color='blue', linewidth=2, label='TDS Output')
plt.axhline(y=80, color='red', linestyle='--', label='Warning Limit (80 ppm)')
plt.xlabel('Day')
plt.ylabel('TDS (ppm)')
plt.title('Water Filter TDS Over 30 Days')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

In [None]:
# BAR CHART - comparing categories
regions = ['North', 'South', 'East', 'West']
avg_tds = [52, 85, 115, 45]

plt.figure(figsize=(8, 4))
colors = ['green' if t < 80 else 'orange' if t < 100 else 'red' for t in avg_tds]
plt.bar(regions, avg_tds, color=colors)
plt.ylabel('Average TDS (ppm)')
plt.title('Average TDS by Region')
plt.axhline(y=80, color='red', linestyle='--', alpha=0.5)
plt.show()

In [None]:
# SCATTER PLOT - relationship between two variables
np.random.seed(42)
filter_age = np.random.randint(10, 365, 50)
tds_output = 30 + filter_age * 0.3 + np.random.randn(50) * 15

plt.figure(figsize=(8, 5))
plt.scatter(filter_age, tds_output, alpha=0.6, c='steelblue')
plt.xlabel('Filter Age (days)')
plt.ylabel('TDS Output (ppm)')
plt.title('Filter Age vs TDS Output')
plt.axhline(y=100, color='red', linestyle='--', label='Alert threshold')
plt.legend()
plt.show()
# You can clearly see: older filters â†’ higher TDS!

In [None]:
# HISTOGRAM - distribution of values
np.random.seed(42)
all_tds = np.concatenate([np.random.normal(50, 15, 200), np.random.normal(120, 20, 50)])

plt.figure(figsize=(8, 4))
plt.hist(all_tds, bins=30, color='steelblue', edgecolor='white', alpha=0.7)
plt.axvline(x=100, color='red', linestyle='--', label='Alert Limit')
plt.xlabel('TDS Output (ppm)')
plt.ylabel('Count')
plt.title('Distribution of TDS Readings')
plt.legend()
plt.show()

In [None]:
# SUBPLOTS - multiple charts in one figure
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

# Plot 1: TDS over time
axes[0].plot(days, tds_readings)
axes[0].set_title('TDS Trend')

# Plot 2: TDS by region
axes[1].bar(regions, avg_tds, color=colors)
axes[1].set_title('TDS by Region')

# Plot 3: Age vs TDS
axes[2].scatter(filter_age, tds_output, alpha=0.5)
axes[2].set_title('Age vs TDS')

plt.tight_layout()
plt.show()

## Exercise

1. Create a line plot of flow_rate decreasing over 30 days
2. Create a bar chart comparing 5 filters' TDS readings
3. Make a 2x2 subplot grid with 4 different chart types

In [None]:
# YOUR CODE HERE