# Example Notebook

Copyright 2025 [Your Name/Organization]

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

---

This notebook demonstrates the project's coding standards and provides examples of data analysis and visualization.

## Setup and Imports

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from typing import List, Dict, Optional

# Configure matplotlib for better plots
plt.style.use('default')
plt.rcParams['figure.figsize'] = (10, 6)
plt.rcParams['font.size'] = 12

## Data Generation Example

In [None]:
def generate_sample_data(n_samples: int = 100, seed: Optional[int] = None) -> pd.DataFrame:
    """
    Generate sample data for demonstration purposes.
    
    Args:
        n_samples (int): Number of samples to generate.
        seed (Optional[int]): Random seed for reproducibility.
        
    Returns:
        pd.DataFrame: Generated sample data.
    """
    assert isinstance(n_samples, int) and n_samples > 0, 'n_samples must be a positive integer'
    assert seed is None or isinstance(seed, int), 'seed must be an integer or None'
    
    if seed is not None:
        np.random.seed(seed)
    
    data = {
        'x': np.random.randn(n_samples),
        'y': np.random.randn(n_samples) * 2 + 1,
        'category': np.random.choice(['A', 'B', 'C'], n_samples),
        'value': np.random.exponential(2, n_samples)
    }
    
    return pd.DataFrame(data)

# Generate sample data
df = generate_sample_data(n_samples=200, seed=42)
print(f'Generated dataset with shape: {df.shape}')
df.head()

## Data Analysis

In [None]:
# Basic statistics
print('Dataset Summary:')
print(df.describe())

print('\nCategory Distribution:')
print(df['category'].value_counts())

## Visualization Examples

In [None]:
# Create subplots
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# Scatter plot
axes[0, 0].scatter(df['x'], df['y'], alpha=0.6, c='blue')
axes[0, 0].set_xlabel('X values')
axes[0, 0].set_ylabel('Y values')
axes[0, 0].set_title('Scatter Plot: X vs Y')
axes[0, 0].grid(True, alpha=0.3)

# Histogram
axes[0, 1].hist(df['value'], bins=20, alpha=0.7, color='green', edgecolor='black')
axes[0, 1].set_xlabel('Value')
axes[0, 1].set_ylabel('Frequency')
axes[0, 1].set_title('Distribution of Values')
axes[0, 1].grid(True, alpha=0.3)

# Box plot by category
categories = df['category'].unique()
box_data = [df[df['category'] == cat]['value'] for cat in categories]
axes[1, 0].boxplot(box_data, labels=categories)
axes[1, 0].set_xlabel('Category')
axes[1, 0].set_ylabel('Value')
axes[1, 0].set_title('Value Distribution by Category')
axes[1, 0].grid(True, alpha=0.3)

# Correlation heatmap (for numerical columns)
corr_matrix = df[['x', 'y', 'value']].corr()
im = axes[1, 1].imshow(corr_matrix, cmap='coolwarm', aspect='auto', vmin=-1, vmax=1)
axes[1, 1].set_xticks(range(len(corr_matrix.columns)))
axes[1, 1].set_yticks(range(len(corr_matrix.columns)))
axes[1, 1].set_xticklabels(corr_matrix.columns)
axes[1, 1].set_yticklabels(corr_matrix.columns)
axes[1, 1].set_title('Correlation Matrix')

# Add correlation values to heatmap
for i in range(len(corr_matrix.columns)):
    for j in range(len(corr_matrix.columns)):
        text = axes[1, 1].text(j, i, f'{corr_matrix.iloc[i, j]:.2f}',
                              ha='center', va='center', color='black', fontweight='bold')

# Add colorbar
plt.colorbar(im, ax=axes[1, 1], shrink=0.8)

plt.tight_layout()
plt.show()

## Summary

This notebook demonstrates:
- Proper Apache 2.0 license attribution
- Code formatting following the project's standards
- Type hints usage
- Function documentation with Google docstring format
- Data analysis and visualization examples
- Use of assertions for input validation

The code follows the project's coding standards:
- Snake_case for variables and functions
- PascalCase for classes
- Single quotes for strings
- F-strings for formatting
- Proper type hints
- Google-style docstrings with triple double quotes