# Data Visualization with Matplotlib

This notebook demonstrates common plot types in Matplotlib with inline rendering. Each section explains the purpose of the visualization and key configuration parameters.

In [None]:
# Imports and basic setup
import matplotlib.pyplot as plt
import numpy as np

# Optional: Improve default aesthetics
plt.style.use('seaborn-v0_8')

## Line Plot
A line plot is ideal for continuous data or functions. Here we visualize $\sin(x)$ and $\cos(x)$ over the same domain, with labels, legend, and grid for readability.

In [None]:
# Line plot: sine and cosine
x = np.linspace(0, 10, 100)
y_sin = np.sin(x)
y_cos = np.cos(x)

plt.figure(figsize=(10, 6))
plt.plot(x, y_sin, label='sin(x)', color='blue', linewidth=2)
plt.plot(x, y_cos, label='cos(x)', color='red', linewidth=2)

plt.title('Sine and Cosine Functions', fontsize=16)
plt.xlabel('x', fontsize=12)
plt.ylabel('y', fontsize=12)
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

## Bar Chart
Bar charts compare discrete categories. We plot quarterly revenue and annotate each bar with its value for quick interpretation.

In [None]:
# Bar chart: quarterly revenue
categories = ['Q1', 'Q2', 'Q3', 'Q4']
values = [65000, 72000, 68000, 75000]
colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#FFA07A']

plt.figure(figsize=(10, 6))
bars = plt.bar(categories, values, color=colors)

# Annotate bars with value labels
for bar in bars:
    h = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2.0, h, f'$ {h:,.0f}',
             ha='center', va='bottom', fontsize=10)

plt.title('Quarterly Sales Revenue', fontsize=16)
plt.xlabel('Quarter', fontsize=12)
plt.ylabel('Revenue ($)', fontsize=12)
plt.grid(axis='y', alpha=0.3)
plt.show()

## Scatter Plot
Scatter plots reveal relationships between variables. We generate synthetic data with a linear trend and overlay a fitted trend line using least squares. A color map encodes the `y` value to add another dimension.

In [None]:
# Scatter plot with trend line and colorbar
np.random.seed(42)
x = np.random.randn(100)
y = 2 * x + np.random.randn(100) * 0.5

plt.figure(figsize=(10, 6))
sc = plt.scatter(x, y, alpha=0.6, c=y, cmap='viridis', s=50)

# Least squares fit (degree 1)
z = np.polyfit(x, y, 1)
p = np.poly1d(z)
plt.plot(x, p(x), 'r--', alpha=0.8, linewidth=2,
         label=f'Trend: y={z[0]:.2f}x+{z[1]:.2f}')

plt.colorbar(sc, label='Y value')
plt.title('Scatter Plot with Trend Line', fontsize=16)
plt.xlabel('X', fontsize=12)
plt.ylabel('Y', fontsize=12)
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

## Multiple Subplots
Subplots allow comparing multiple related visualizations in a grid. We show sine, cosine, an exponential curve, and a histogram (normal distribution) side-by-side.

In [None]:
# 2x2 grid of subplots
x = np.linspace(0, 10, 100)
fig, axes = plt.subplots(2, 2, figsize=(12, 10))

# Sine
axes[0, 0].plot(x, np.sin(x), 'b-')
axes[0, 0].set_title('Sine Wave')
axes[0, 0].grid(True, alpha=0.3)

# Cosine
axes[0, 1].plot(x, np.cos(x), 'r-')
axes[0, 1].set_title('Cosine Wave')
axes[0, 1].grid(True, alpha=0.3)

# Exponential
axes[1, 0].plot(x, np.exp(x / 5), 'g-')
axes[1, 0].set_title('Exponential Growth')
axes[1, 0].grid(True, alpha=0.3)

# Histogram
data = np.random.randn(1000)
axes[1, 1].hist(data, bins=30, edgecolor='black', alpha=0.7)
axes[1, 1].set_title('Normal Distribution')
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## Pie Chart
Pie charts show part-to-whole relationships. We visualize product share with an exploded first slice and enforce equal aspect ratio for a circular pie.

In [None]:
# Pie chart: product distribution
categories = ['Product A', 'Product B', 'Product C', 'Product D']
sizes = [30, 25, 20, 25]
colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#FFA07A']
explode = (0.1, 0, 0, 0)

plt.figure(figsize=(10, 8))
plt.pie(sizes, explode=explode, labels=categories, colors=colors,
        autopct='%1.1f%%', shadow=True, startangle=90)
plt.title('Product Sales Distribution', fontsize=16)
plt.axis('equal')
plt.show()

## Box Plot
Box plots summarize the distribution of numeric variables, highlighting median, quartiles, and potential outliers. They are useful for quick comparisons across multiple features.

In [None]:
# Box plot for multiple distributions
np.random.seed(123)
data_box = np.column_stack([
    np.random.normal(loc=0.0, scale=1.0, size=500),
    np.random.normal(loc=2.0, scale=0.5, size=500),
    np.random.normal(loc=-1.5, scale=1.2, size=500)
])
labels = ['Feature A', 'Feature B', 'Feature C']

plt.figure(figsize=(10, 6))
plt.boxplot(data_box, labels=labels, showmeans=True)
plt.title('Box Plot of Three Features', fontsize=16)
plt.ylabel('Value', fontsize=12)
plt.grid(axis='y', alpha=0.3)
plt.show()

## Pandas Integration
We use Pandas to load a CSV and perform basic plotting from a DataFrame. This demonstrates idiomatic data exploration workflows using Matplotlib as the rendering backend.

In [None]:
# Load sample data with Pandas and preview
import pandas as pd

# The notebook resides in the notebooks/ folder, so relative path works
df = pd.read_csv('sample_data.csv')
df.head()

In [None]:
# DataFrame line plot for numeric columns (first 3 columns if available)
numeric_cols = df.select_dtypes(include=['number']).columns.tolist()
use_cols = numeric_cols[:3] if len(numeric_cols) >= 3 else numeric_cols
plt.figure(figsize=(10, 6))
df[use_cols].plot(ax=plt.gca(), linewidth=2)
plt.title('Pandas DataFrame: Line Plot of Numeric Columns', fontsize=16)
plt.xlabel('Index', fontsize=12)
plt.ylabel('Value', fontsize=12)
plt.grid(True, alpha=0.3)
plt.show()

## Correlation Heatmap
A correlation heatmap displays pairwise correlations among numeric features. Positive correlations appear as warm colors; negative correlations as cool colors.

In [None]:
# Compute correlation matrix and visualize via imshow
numeric_cols = df.select_dtypes(include=['number']).columns.tolist()
if len(numeric_cols) >= 2:
    corr = df[numeric_cols].corr()
    plt.figure(figsize=(8, 6))
    im = plt.imshow(corr, cmap='coolwarm', vmin=-1, vmax=1)
    plt.colorbar(im, fraction=0.046, pad=0.04)
    plt.xticks(ticks=range(len(corr.columns)), labels=corr.columns, rotation=45, ha='right')
    plt.yticks(ticks=range(len(corr.index)), labels=corr.index)
    plt.title('Correlation Heatmap', fontsize=16)
    plt.tight_layout()
    plt.show()
else:
    print('Not enough numeric columns for correlation heatmap.')

## Area / Stack Plot
Area charts emphasize cumulative totals over a domain. We create a small synthetic dataset and show a stacked area chart for three series.

In [None]:
# Stacked area chart using synthetic time series
x = np.arange(0, 20)
series1 = np.clip(np.sin(x/2) + 1.0, 0, None)
series2 = np.clip(np.cos(x/3) + 1.2, 0, None)
series3 = np.clip(np.sin(x/4 + 0.5) + 0.8, 0, None)

plt.figure(figsize=(10, 6))
plt.stackplot(x, series1, series2, series3, labels=['Series 1','Series 2','Series 3'],
              colors=['#45B7D1','#FFA07A','#4ECDC4'], alpha=0.8)
plt.legend(loc='upper left')
plt.title('Stacked Area Chart', fontsize=16)
plt.xlabel('Time', fontsize=12)
plt.ylabel('Value', fontsize=12)
plt.grid(True, alpha=0.3)
plt.show()