# üìä Feb 07: Seaborn Basics - Practical Examples

Learn to create beautiful statistical visualizations with Seaborn.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Set Seaborn style
sns.set_theme(style="whitegrid", palette="husl")

%matplotlib inline

## 1. Creating Sample Data

In [None]:
# Create a sample dataset for demonstrations
np.random.seed(42)

# Employee data
n_employees = 200
employees = pd.DataFrame({
    'Department': np.random.choice(['Sales', 'Engineering', 'Marketing', 'HR'], n_employees),
    'Salary': np.random.normal(60000, 15000, n_employees),
    'Experience': np.random.randint(0, 20, n_employees),
    'Age': np.random.randint(22, 65, n_employees),
    'Performance': np.random.choice(['Low', 'Medium', 'High'], n_employees, p=[0.2, 0.5, 0.3])
})

# Ensure positive salaries
employees['Salary'] = employees['Salary'].abs()

print(employees.head())
print(f"\nDataset shape: {employees.shape}")

## 2. Distribution Plots - Histogram

In [None]:
# Simple histogram with KDE
plt.figure(figsize=(10, 6))
sns.histplot(data=employees, x='Salary', kde=True, bins=30, color='#3498db')
plt.title('Salary Distribution', fontsize=14, fontweight='bold')
plt.xlabel('Salary ($)', fontsize=12)
plt.ylabel('Frequency', fontsize=12)
plt.tight_layout()
plt.show()

## 3. Distribution by Category

In [None]:
# Histogram with hue (multiple distributions)
plt.figure(figsize=(12, 6))
sns.histplot(data=employees, x='Salary', hue='Department', kde=True, bins=25, alpha=0.6)
plt.title('Salary Distribution by Department', fontsize=14, fontweight='bold')
plt.xlabel('Salary ($)', fontsize=12)
plt.ylabel('Frequency', fontsize=12)
plt.legend(title='Department', fontsize=10)
plt.tight_layout()
plt.show()

## 4. KDE Plot - Smooth Distribution

In [None]:
# KDE plot for smooth distribution curves
plt.figure(figsize=(12, 6))
sns.kdeplot(data=employees, x='Salary', hue='Performance', fill=True, alpha=0.5, linewidth=2)
plt.title('Salary Distribution by Performance Level', fontsize=14, fontweight='bold')
plt.xlabel('Salary ($)', fontsize=12)
plt.ylabel('Density', fontsize=12)
plt.legend(title='Performance', fontsize=10)
plt.tight_layout()
plt.show()

## 5. Box Plot - Distribution with Quartiles

In [None]:
# Box plot showing quartiles and outliers
plt.figure(figsize=(10, 6))
sns.boxplot(data=employees, x='Department', y='Salary', palette='Set2')
plt.title('Salary Distribution by Department (Box Plot)', fontsize=14, fontweight='bold')
plt.xlabel('Department', fontsize=12)
plt.ylabel('Salary ($)', fontsize=12)
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()

## 6. Violin Plot - Distribution Shape

In [None]:
# Violin plot combines box plot with KDE
plt.figure(figsize=(12, 6))
sns.violinplot(data=employees, x='Department', y='Salary', hue='Performance', 
               split=False, palette='muted')
plt.title('Salary Distribution by Department and Performance', fontsize=14, fontweight='bold')
plt.xlabel('Department', fontsize=12)
plt.ylabel('Salary ($)', fontsize=12)
plt.legend(title='Performance', fontsize=10)
plt.tight_layout()
plt.show()

## 7. Bar Plot - Mean with Confidence Interval

In [None]:
# Bar plot shows mean and confidence interval
plt.figure(figsize=(10, 6))
sns.barplot(data=employees, x='Department', y='Salary', palette='viridis', 
            errorbar='ci', capsize=0.1)
plt.title('Average Salary by Department (with 95% CI)', fontsize=14, fontweight='bold')
plt.xlabel('Department', fontsize=12)
plt.ylabel('Average Salary ($)', fontsize=12)
plt.tight_layout()
plt.show()

## 8. Count Plot - Category Frequencies

In [None]:
# Count plot for categorical data
plt.figure(figsize=(10, 6))
sns.countplot(data=employees, x='Department', hue='Performance', palette='Set1')
plt.title('Employee Count by Department and Performance', fontsize=14, fontweight='bold')
plt.xlabel('Department', fontsize=12)
plt.ylabel('Count', fontsize=12)
plt.legend(title='Performance', fontsize=10)
plt.tight_layout()
plt.show()

## 9. Scatter Plot - Relationships

In [None]:
# Scatter plot to show relationships
plt.figure(figsize=(10, 6))
sns.scatterplot(data=employees, x='Experience', y='Salary', hue='Department', 
                size='Age', sizes=(50, 300), alpha=0.7)
plt.title('Experience vs Salary by Department', fontsize=14, fontweight='bold')
plt.xlabel('Years of Experience', fontsize=12)
plt.ylabel('Salary ($)', fontsize=12)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=10)
plt.tight_layout()
plt.show()

## 10. Regression Plot - Trend Line

In [None]:
# Regression plot with trend line
plt.figure(figsize=(10, 6))
sns.regplot(data=employees, x='Experience', y='Salary', 
            scatter_kws={'alpha':0.5, 's':50}, 
            line_kws={'color':'red', 'linewidth':2})
plt.title('Experience vs Salary with Trend Line', fontsize=14, fontweight='bold')
plt.xlabel('Years of Experience', fontsize=12)
plt.ylabel('Salary ($)', fontsize=12)
plt.tight_layout()
plt.show()

## 11. Exploring Different Styles

In [None]:
# Compare different Seaborn styles
styles = ['whitegrid', 'darkgrid', 'white', 'dark', 'ticks']

fig, axes = plt.subplots(2, 3, figsize=(15, 10))
axes = axes.flatten()

for idx, style in enumerate(styles):
    sns.set_style(style)
    ax = axes[idx]
    sns.boxplot(data=employees, x='Department', y='Salary', ax=ax, palette='Set2')
    ax.set_title(f'Style: {style}', fontweight='bold')
    ax.set_xlabel('')
    ax.set_ylabel('Salary')
    ax.tick_params(axis='x', rotation=45)

# Hide the extra subplot
axes[5].axis('off')

plt.suptitle('Seaborn Style Comparison', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()

# Reset to default
sns.set_style('whitegrid')

## 12. Color Palettes Comparison

In [None]:
# Compare different color palettes
palettes = ['Set2', 'husl', 'viridis', 'muted', 'pastel', 'bright']

fig, axes = plt.subplots(2, 3, figsize=(15, 10))
axes = axes.flatten()

for idx, palette in enumerate(palettes):
    ax = axes[idx]
    sns.countplot(data=employees, x='Department', hue='Performance', 
                  ax=ax, palette=palette)
    ax.set_title(f'Palette: {palette}', fontweight='bold')
    ax.set_xlabel('')
    ax.set_ylabel('Count')
    ax.legend(title='Performance', fontsize=8)

plt.suptitle('Seaborn Color Palette Comparison', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()

## 13. Real-World Example: Sales Analysis

In [None]:
# Create sales dataset
np.random.seed(42)
n_sales = 300

sales_data = pd.DataFrame({
    'Region': np.random.choice(['North', 'South', 'East', 'West'], n_sales),
    'Product': np.random.choice(['A', 'B', 'C'], n_sales),
    'Sales': np.random.gamma(2, 10000, n_sales),
    'Units': np.random.randint(10, 200, n_sales),
    'Quarter': np.random.choice(['Q1', 'Q2', 'Q3', 'Q4'], n_sales)
})

print(sales_data.head())

In [None]:
# Comprehensive sales dashboard
fig = plt.figure(figsize=(16, 12))

# 1. Sales distribution
ax1 = plt.subplot(2, 3, 1)
sns.histplot(data=sales_data, x='Sales', kde=True, bins=30, color='#3498db', ax=ax1)
ax1.set_title('Sales Distribution', fontweight='bold')
ax1.set_xlabel('Sales ($)')

# 2. Sales by region
ax2 = plt.subplot(2, 3, 2)
sns.boxplot(data=sales_data, x='Region', y='Sales', palette='Set2', ax=ax2)
ax2.set_title('Sales by Region', fontweight='bold')
ax2.set_xlabel('Region')
ax2.set_ylabel('Sales ($)')

# 3. Product performance
ax3 = plt.subplot(2, 3, 3)
sns.barplot(data=sales_data, x='Product', y='Sales', palette='viridis', ax=ax3)
ax3.set_title('Average Sales by Product', fontweight='bold')
ax3.set_xlabel('Product')
ax3.set_ylabel('Average Sales ($)')

# 4. Regional distribution
ax4 = plt.subplot(2, 3, 4)
sns.countplot(data=sales_data, x='Region', hue='Product', palette='Set1', ax=ax4)
ax4.set_title('Sales Count by Region and Product', fontweight='bold')
ax4.set_xlabel('Region')
ax4.set_ylabel('Count')
ax4.legend(title='Product', fontsize=9)

# 5. Units vs Sales
ax5 = plt.subplot(2, 3, 5)
sns.scatterplot(data=sales_data, x='Units', y='Sales', hue='Product', 
                alpha=0.6, s=80, ax=ax5)
ax5.set_title('Units Sold vs Sales Revenue', fontweight='bold')
ax5.set_xlabel('Units Sold')
ax5.set_ylabel('Sales ($)')
ax5.legend(title='Product', fontsize=9)

# 6. Quarterly performance
ax6 = plt.subplot(2, 3, 6)
sns.violinplot(data=sales_data, x='Quarter', y='Sales', palette='muted', ax=ax6)
ax6.set_title('Sales Distribution by Quarter', fontweight='bold')
ax6.set_xlabel('Quarter')
ax6.set_ylabel('Sales ($)')

plt.suptitle('Sales Performance Dashboard', fontsize=16, fontweight='bold', y=0.995)
plt.tight_layout()
plt.show()

## üèÜ Practice Exercise

Using the employees dataset, create:

1. A **histogram** showing age distribution with KDE
2. A **box plot** comparing experience across departments
3. A **violin plot** showing salary by performance level
4. A **scatter plot** showing age vs salary colored by department
5. A **count plot** showing performance distribution by department

Make them look professional with proper titles, labels, and color palettes!

In [None]:
# Your code here
# Create 5 different Seaborn visualizations


## Key Takeaways

‚úÖ **Seaborn** makes beautiful plots with minimal code  
‚úÖ Use **histplot** and **kdeplot** for distributions  
‚úÖ **Box plots** and **violin plots** show distribution quartiles  
‚úÖ **Bar plots** automatically calculate means and confidence intervals  
‚úÖ Use the **hue** parameter to add categorical dimensions  
‚úÖ **Themes** and **palettes** dramatically improve aesthetics  
‚úÖ Seaborn integrates seamlessly with **Pandas DataFrames**  
‚úÖ Combine with **Matplotlib** for fine-grained control  