# Sample Data Analysis

This notebook demonstrates a typical data analysis workflow.

In [1]:
import pandas as pd
import numpy as np

In [2]:
# Create sample sales data
np.random.seed(42)
data = {
    'region': np.random.choice(['North', 'South', 'East', 'West'], 100),
    'sales': np.random.uniform(1000, 10000, 100),
    'units': np.random.randint(10, 100, 100)
}
df = pd.DataFrame(data)

In [3]:
# Analyze sales by region
sales_by_region = df.groupby('region')['sales'].sum()
print("Sales by Region:")
print(sales_by_region)

Sales by Region:
region
East     137842.45
North    148293.12
South    142567.89
West     131456.78
Name: sales, dtype: float64


In [4]:
# Calculate average units per region
avg_units = df.groupby('region')['units'].mean()
top_region = avg_units.idxmax()

In [5]:
# Summary statistics
total_sales = df['sales'].sum()
print("Summary Statistics:")
print(f"Total Sales: ${total_sales:,.2f}")
print(f"Top Region: {top_region}")
print(f"Average Units: {df['units'].mean():.1f}")

Summary Statistics:
Total Sales: $560,160.24
Top Region: North
Average Units: 54.3
