# Data Visualization Template

**Author:** Your Name

**Date:** YYYY-MM-DD

**Purpose:** Create comprehensive visualizations for data analysis

---

## 1. Setup and Imports

In [None]:
# Data manipulation
import numpy as np
import pandas as pd

# Visualization libraries
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

# Matplotlib settings
%matplotlib inline
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 11
plt.rcParams['axes.labelsize'] = 12
plt.rcParams['axes.titlesize'] = 14
plt.rcParams['xtick.labelsize'] = 10
plt.rcParams['ytick.labelsize'] = 10
plt.rcParams['legend.fontsize'] = 10

# Seaborn settings
sns.set_palette('husl')
sns.set_context('notebook', font_scale=1.1)

## 2. Load Data

In [None]:
# Load your dataset
# df = pd.read_csv('data/your_dataset.csv')
# df.head()

## 3. Distribution Plots

Visualize the distribution of variables.

In [None]:
# Histogram
# plt.figure(figsize=(12, 5))
# plt.subplot(1, 2, 1)
# plt.hist(df['numeric_column'], bins=30, edgecolor='black', alpha=0.7)
# plt.xlabel('Value')
# plt.ylabel('Frequency')
# plt.title('Distribution of Numeric Column')

# # Density plot
# plt.subplot(1, 2, 2)
# df['numeric_column'].plot(kind='density', linewidth=2)
# plt.xlabel('Value')
# plt.ylabel('Density')
# plt.title('Density Plot')
# plt.tight_layout()
# plt.show()

In [None]:
# Box plot for outlier detection
# plt.figure(figsize=(10, 6))
# sns.boxplot(data=df, x='category_column', y='numeric_column')
# plt.xticks(rotation=45)
# plt.title('Box Plot by Category')
# plt.tight_layout()
# plt.show()

In [None]:
# Violin plot
# plt.figure(figsize=(12, 6))
# sns.violinplot(data=df, x='category_column', y='numeric_column', hue='subcategory')
# plt.title('Violin Plot')
# plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
# plt.tight_layout()
# plt.show()

## 4. Relationship Plots

Explore relationships between variables.

In [None]:
# Scatter plot
# plt.figure(figsize=(10, 6))
# plt.scatter(df['x_column'], df['y_column'], alpha=0.6, s=50)
# plt.xlabel('X Variable')
# plt.ylabel('Y Variable')
# plt.title('Scatter Plot')
# plt.grid(True, alpha=0.3)
# plt.show()

In [None]:
# Scatter plot with regression line
# plt.figure(figsize=(10, 6))
# sns.regplot(data=df, x='x_column', y='y_column', scatter_kws={'alpha':0.5})
# plt.title('Scatter Plot with Regression Line')
# plt.show()

In [None]:
# Pair plot
# numeric_cols = df.select_dtypes(include=[np.number]).columns[:4]  # First 4 numeric columns
# sns.pairplot(df[numeric_cols], diag_kind='kde', plot_kws={'alpha': 0.6})
# plt.suptitle('Pair Plot', y=1.02)
# plt.show()

## 5. Correlation Analysis

In [None]:
# Correlation heatmap
# plt.figure(figsize=(12, 10))
# correlation_matrix = df.corr()
# sns.heatmap(correlation_matrix, 
#             annot=True, 
#             fmt='.2f', 
#             cmap='coolwarm', 
#             center=0,
#             square=True,
#             linewidths=1,
#             cbar_kws={"shrink": 0.8})
# plt.title('Correlation Matrix Heatmap')
# plt.tight_layout()
# plt.show()

## 6. Time Series Visualization

In [None]:
# Time series plot
# if 'date_column' in df.columns:
#     df['date_column'] = pd.to_datetime(df['date_column'])
#     df_sorted = df.sort_values('date_column')
#     
#     plt.figure(figsize=(14, 6))
#     plt.plot(df_sorted['date_column'], df_sorted['value_column'], linewidth=2)
#     plt.xlabel('Date')
#     plt.ylabel('Value')
#     plt.title('Time Series Plot')
#     plt.xticks(rotation=45)
#     plt.grid(True, alpha=0.3)
#     plt.tight_layout()
#     plt.show()

## 7. Categorical Data Visualization

In [None]:
# Bar plot
# category_counts = df['category_column'].value_counts()
# plt.figure(figsize=(10, 6))
# category_counts.plot(kind='bar', color='steelblue', edgecolor='black')
# plt.xlabel('Category')
# plt.ylabel('Count')
# plt.title('Distribution by Category')
# plt.xticks(rotation=45)
# plt.tight_layout()
# plt.show()

In [None]:
# Pie chart
# plt.figure(figsize=(10, 8))
# category_counts.plot(kind='pie', autopct='%1.1f%%', startangle=90)
# plt.ylabel('')
# plt.title('Distribution Pie Chart')
# plt.show()

In [None]:
# Grouped bar plot
# grouped_data = df.groupby(['category1', 'category2'])['value'].mean().unstack()
# grouped_data.plot(kind='bar', figsize=(12, 6), width=0.8)
# plt.xlabel('Category 1')
# plt.ylabel('Average Value')
# plt.title('Grouped Bar Plot')
# plt.legend(title='Category 2')
# plt.xticks(rotation=45)
# plt.tight_layout()
# plt.show()

## 8. Interactive Visualizations (Plotly)

In [None]:
# Interactive scatter plot
# fig = px.scatter(df, 
#                  x='x_column', 
#                  y='y_column',
#                  color='category_column',
#                  size='size_column',
#                  hover_data=['additional_info'],
#                  title='Interactive Scatter Plot')
# fig.show()

In [None]:
# Interactive line chart
# fig = px.line(df, 
#               x='date_column', 
#               y='value_column',
#               color='category',
#               title='Interactive Line Chart')
# fig.update_layout(hovermode='x unified')
# fig.show()

In [None]:
# Interactive 3D scatter plot
# fig = px.scatter_3d(df, 
#                     x='x_column', 
#                     y='y_column', 
#                     z='z_column',
#                     color='category_column',
#                     title='3D Scatter Plot')
# fig.show()

## 9. Advanced Visualizations

In [None]:
# Subplots with different plot types
# fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# # Subplot 1: Histogram
# axes[0, 0].hist(df['column1'], bins=30, edgecolor='black', alpha=0.7)
# axes[0, 0].set_title('Histogram')
# axes[0, 0].set_xlabel('Value')
# axes[0, 0].set_ylabel('Frequency')

# # Subplot 2: Scatter plot
# axes[0, 1].scatter(df['column2'], df['column3'], alpha=0.6)
# axes[0, 1].set_title('Scatter Plot')
# axes[0, 1].set_xlabel('Column 2')
# axes[0, 1].set_ylabel('Column 3')

# # Subplot 3: Box plot
# df.boxplot(column='column1', by='category', ax=axes[1, 0])
# axes[1, 0].set_title('Box Plot by Category')

# # Subplot 4: Bar plot
# df['category'].value_counts().plot(kind='bar', ax=axes[1, 1])
# axes[1, 1].set_title('Category Distribution')

# plt.tight_layout()
# plt.show()

## 10. Save Figures

In [None]:
# Save figures
# plt.figure(figsize=(12, 6))
# # Create your plot here
# plt.savefig('output/figure_name.png', dpi=300, bbox_inches='tight')
# plt.savefig('output/figure_name.pdf', bbox_inches='tight')
# print("Figures saved successfully")

## Summary

This notebook demonstrates various visualization techniques:

- Distribution plots (histograms, density, box, violin)
- Relationship plots (scatter, regression, pair plots)
- Correlation analysis (heatmaps)
- Time series visualization
- Categorical data visualization (bar, pie charts)
- Interactive visualizations with Plotly
- Multi-panel layouts

Choose the appropriate visualization type based on:
- Data type (numerical, categorical, temporal)
- Number of variables
- Analysis objective
- Target audience