# E-Commerce Dataset Tutorial

This tutorial demonstrates how to generate and explore a synthetic e-commerce dataset using `pandas` and `matplotlib`.

In [None]:
from generate_ecommerce_dataset import generate_ecommerce_dataset

# Generate the dataset with a fixed seed for reproducibility
orders, order_items, products = generate_ecommerce_dataset(seed=123)

# Preview the datasets
orders.head()

In [None]:
order_items.head()

In [None]:
products.head()

## Most Common Product Categories

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

category_counts = products['ProductCategory'].value_counts()
category_counts.plot(kind='bar', title='Number of Products per Category')
plt.ylabel('Number of Products')
plt.xlabel('Category')
plt.tight_layout()
plt.show()

## Top Revenue-Generating Products

In [None]:
product_revenue = order_items.groupby('ProductID')['PurchaseAmount'].sum()
top_products = product_revenue.sort_values(ascending=False).head(10)

# Merge to get product names
top_products_df = pd.merge(top_products.reset_index(), products[['ProductID', 'ProductName']], on='ProductID')

plt.figure(figsize=(10, 5))
plt.barh(top_products_df['ProductName'], top_products_df['PurchaseAmount'])
plt.xlabel('Revenue')
plt.title('Top 10 Revenue-Generating Products')
plt.gca().invert_yaxis()
plt.tight_layout()
plt.show()

## Distribution of Orders by Payment Method

In [None]:
orders['PaymentMethod'].value_counts().plot(kind='pie', autopct='%1.1f%%', title='Payment Methods')
plt.ylabel('')
plt.show()