# Solution: Advanced Querying and Performance

This notebook contains complete solutions for all the advanced querying exercises.


## Exercise 1: Setup and Model Creation


In [None]:
# Solution: Set up SQLAlchemy and create models
from sqlalchemy import create_engine, Column, Integer, String, DateTime, ForeignKey, Text, Boolean, Float, Index
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker, relationship, joinedload, subqueryload, selectinload
from sqlalchemy import func, and_, or_, not_, desc, asc
from datetime import datetime
import time

# Create database engine with echo for SQL logging
engine = create_engine('sqlite:///advanced_querying_exercise.db', echo=True)
Base = declarative_base()
Session = sessionmaker(bind=engine)

# E-commerce domain models
class Customer(Base):
    __tablename__ = 'customers'
    
    id = Column(Integer, primary_key=True)
    name = Column(String(100), nullable=False, index=True)
    email = Column(String(100), unique=True, nullable=False)
    phone = Column(String(20))
    created_at = Column(DateTime, default=datetime.utcnow)
    
    # Relationships
    orders = relationship("Order", back_populates="customer", cascade="all, delete-orphan")
    
    def __repr__(self):
        return f"<Customer(name='{self.name}', email='{self.email}')>"

class Product(Base):
    __tablename__ = 'products'
    
    id = Column(Integer, primary_key=True)
    name = Column(String(200), nullable=False, index=True)
    description = Column(Text)
    price = Column(Float, nullable=False, index=True)
    category = Column(String(50), index=True)
    stock_quantity = Column(Integer, default=0)
    is_active = Column(Boolean, default=True)
    created_at = Column(DateTime, default=datetime.utcnow)
    
    # Relationships
    order_items = relationship("OrderItem", back_populates="product", cascade="all, delete-orphan")
    reviews = relationship("Review", back_populates="product", cascade="all, delete-orphan")
    
    def __repr__(self):
        return f"<Product(name='{self.name}', price={self.price})>"

class Order(Base):
    __tablename__ = 'orders'
    
    id = Column(Integer, primary_key=True)
    order_number = Column(String(50), unique=True, nullable=False)
    total_amount = Column(Float, nullable=False)
    status = Column(String(20), default='pending', index=True)
    order_date = Column(DateTime, default=datetime.utcnow, index=True)
    
    # Foreign keys
    customer_id = Column(Integer, ForeignKey('customers.id'), nullable=False)
    
    # Relationships
    customer = relationship("Customer", back_populates="orders")
    order_items = relationship("OrderItem", back_populates="order", cascade="all, delete-orphan")
    
    def __repr__(self):
        return f"<Order(order_number='{self.order_number}', total={self.total_amount})>"

class OrderItem(Base):
    __tablename__ = 'order_items'
    
    id = Column(Integer, primary_key=True)
    quantity = Column(Integer, nullable=False)
    unit_price = Column(Float, nullable=False)
    total_price = Column(Float, nullable=False)
    
    # Foreign keys
    order_id = Column(Integer, ForeignKey('orders.id'), nullable=False)
    product_id = Column(Integer, ForeignKey('products.id'), nullable=False)
    
    # Relationships
    order = relationship("Order", back_populates="order_items")
    product = relationship("Product", back_populates="order_items")
    
    def __repr__(self):
        return f"<OrderItem(product_id={self.product_id}, quantity={self.quantity})>"

class Review(Base):
    __tablename__ = 'reviews'
    
    id = Column(Integer, primary_key=True)
    rating = Column(Integer, nullable=False)  # 1-5 stars
    comment = Column(Text)
    created_at = Column(DateTime, default=datetime.utcnow)
    
    # Foreign keys
    product_id = Column(Integer, ForeignKey('products.id'), nullable=False)
    customer_id = Column(Integer, ForeignKey('customers.id'), nullable=False)
    
    # Relationships
    product = relationship("Product", back_populates="reviews")
    customer = relationship("Customer")
    
    def __repr__(self):
        return f"<Review(rating={self.rating}, product_id={self.product_id})>"

# Create indexes for performance
Index('idx_orders_customer_date', Order.customer_id, Order.order_date)
Index('idx_order_items_order_product', OrderItem.order_id, OrderItem.product_id)
Index('idx_reviews_product_rating', Review.product_id, Review.rating)

# Create tables
Base.metadata.create_all(engine)

print("✅ Models created successfully!")
print("Models: Customer, Product, Order, OrderItem, Review")
print("Indexes created for performance optimization")


In [None]:
# Solution: Create sample data
session = Session()

# Create customers
customers_data = [
    Customer(name="Alice Johnson", email="alice@example.com", phone="555-0101"),
    Customer(name="Bob Smith", email="bob@example.com", phone="555-0102"),
    Customer(name="Carol Davis", email="carol@example.com", phone="555-0103"),
    Customer(name="David Wilson", email="david@example.com", phone="555-0104"),
    Customer(name="Eve Brown", email="eve@example.com", phone="555-0105")
]

session.add_all(customers_data)
session.commit()

# Create products
products_data = [
    Product(name="Laptop Pro", description="High-performance laptop", price=1299.99, category="Electronics", stock_quantity=50),
    Product(name="Wireless Mouse", description="Ergonomic wireless mouse", price=29.99, category="Electronics", stock_quantity=200),
    Product(name="Coffee Maker", description="Automatic coffee maker", price=89.99, category="Appliances", stock_quantity=30),
    Product(name="Desk Chair", description="Ergonomic office chair", price=199.99, category="Furniture", stock_quantity=25),
    Product(name="Notebook Set", description="Premium notebook set", price=19.99, category="Stationery", stock_quantity=100),
    Product(name="Bluetooth Speaker", description="Portable Bluetooth speaker", price=79.99, category="Electronics", stock_quantity=75)
]

session.add_all(products_data)
session.commit()

# Create orders
orders_data = [
    Order(order_number="ORD-001", total_amount=1329.98, status="completed", customer_id=1),
    Order(order_number="ORD-002", total_amount=109.98, status="pending", customer_id=2),
    Order(order_number="ORD-003", total_amount=219.98, status="completed", customer_id=1),
    Order(order_number="ORD-004", total_amount=89.99, status="shipped", customer_id=3),
    Order(order_number="ORD-005", total_amount=199.99, status="completed", customer_id=4)
]

session.add_all(orders_data)
session.commit()

# Create order items
order_items_data = [
    OrderItem(order_id=1, product_id=1, quantity=1, unit_price=1299.99, total_price=1299.99),
    OrderItem(order_id=1, product_id=2, quantity=1, unit_price=29.99, total_price=29.99),
    OrderItem(order_id=2, product_id=3, quantity=1, unit_price=89.99, total_price=89.99),
    OrderItem(order_id=2, product_id=5, quantity=1, unit_price=19.99, total_price=19.99),
    OrderItem(order_id=3, product_id=4, quantity=1, unit_price=199.99, total_price=199.99),
    OrderItem(order_id=3, product_id=5, quantity=1, unit_price=19.99, total_price=19.99),
    OrderItem(order_id=4, product_id=3, quantity=1, unit_price=89.99, total_price=89.99),
    OrderItem(order_id=5, product_id=4, quantity=1, unit_price=199.99, total_price=199.99)
]

session.add_all(order_items_data)
session.commit()

# Create reviews
reviews_data = [
    Review(rating=5, comment="Excellent laptop!", product_id=1, customer_id=1),
    Review(rating=4, comment="Good mouse, comfortable to use", product_id=2, customer_id=1),
    Review(rating=5, comment="Great coffee maker", product_id=3, customer_id=2),
    Review(rating=3, comment="Decent chair, could be better", product_id=4, customer_id=1),
    Review(rating=4, comment="Nice notebooks", product_id=5, customer_id=2),
    Review(rating=5, comment="Amazing sound quality", product_id=6, customer_id=3),
    Review(rating=4, comment="Good value for money", product_id=1, customer_id=4)
]

session.add_all(reviews_data)
session.commit()

print("✅ Sample data created successfully!")
print(f"Customers: {len(customers_data)}")
print(f"Products: {len(products_data)}")
print(f"Orders: {len(orders_data)}")
print(f"Order Items: {len(order_items_data)}")
print(f"Reviews: {len(reviews_data)}")


## Exercise 2: Advanced Joins and Complex Queries


In [None]:
# Solution: Advanced joins and complex queries
print("=== Advanced Joins and Complex Queries ===")

# 1. Join all models together
print("\n1. Complete order details with all related information:")
complete_orders = session.query(
    Order.order_number,
    Customer.name.label('customer_name'),
    Product.name.label('product_name'),
    OrderItem.quantity,
    OrderItem.unit_price,
    OrderItem.total_price,
    Order.status
).join(Customer).join(OrderItem).join(Product).all()

for order_num, customer, product, qty, unit_price, total, status in complete_orders:
    print(f"  {order_num}: {customer} - {product} (x{qty}) @ ${unit_price} = ${total} [{status}]")

# 2. Outer join to include products without reviews
print("\n2. All products with review count (including products with no reviews):")
from sqlalchemy.orm import outerjoin

products_with_reviews = session.query(
    Product.name,
    Product.category,
    func.count(Review.id).label('review_count'),
    func.avg(Review.rating).label('avg_rating')
).outerjoin(Review).group_by(Product.id, Product.name, Product.category).all()

for name, category, review_count, avg_rating in products_with_reviews:
    if avg_rating:
        print(f"  {name} ({category}) - {review_count} reviews, avg: {avg_rating:.1f}★")
    else:
        print(f"  {name} ({category}) - {review_count} reviews, no rating")

# 3. Complex query with multiple conditions and grouping
print("\n3. Customer order statistics:")
customer_stats = session.query(
    Customer.name,
    func.count(Order.id).label('order_count'),
    func.sum(Order.total_amount).label('total_spent'),
    func.avg(Order.total_amount).label('avg_order_value')
).join(Order).group_by(Customer.id, Customer.name).all()

for name, order_count, total_spent, avg_value in customer_stats:
    print(f"  {name}: {order_count} orders, ${total_spent:.2f} total, ${avg_value:.2f} avg")

# 4. Aggregation with complex filtering
print("\n4. High-value products (price > $100) with sales data:")
high_value_products = session.query(
    Product.name,
    Product.price,
    func.count(OrderItem.id).label('times_ordered'),
    func.sum(OrderItem.quantity).label('total_quantity_sold'),
    func.sum(OrderItem.total_price).label('total_revenue')
).join(OrderItem).filter(Product.price > 100).group_by(
    Product.id, Product.name, Product.price
).all()

for name, price, times_ordered, qty_sold, revenue in high_value_products:
    print(f"  {name} (${price}): {times_ordered} orders, {qty_sold} units, ${revenue:.2f} revenue")

# 5. Self-join example (customers who ordered the same product)
print("\n5. Products ordered by multiple customers:")
popular_products = session.query(
    Product.name,
    func.count(func.distinct(Order.customer_id)).label('unique_customers')
).join(OrderItem).join(Order).group_by(Product.id, Product.name).having(
    func.count(func.distinct(Order.customer_id)) > 1
).all()

for name, customer_count in popular_products:
    print(f"  {name}: ordered by {customer_count} different customers")


## Exercise 3: Subqueries and Advanced Filtering


In [None]:
# Solution: Subqueries and advanced filtering
print("=== Subqueries and Advanced Filtering ===")

# 1. Subquery to find products with above-average ratings
print("\n1. Products with above-average ratings:")
avg_rating_subquery = session.query(func.avg(Review.rating)).scalar()

above_avg_products = session.query(Product).join(Review).group_by(Product.id).having(
    func.avg(Review.rating) > avg_rating_subquery
).all()

print(f"Average rating across all products: {avg_rating_subquery:.2f}")
for product in above_avg_products:
    avg_rating = session.query(func.avg(Review.rating)).filter(Review.product_id == product.id).scalar()
    print(f"  {product.name} - {avg_rating:.1f}★")

# 2. EXISTS subquery to find customers who have placed orders
print("\n2. Customers who have placed orders (using EXISTS):")
customers_with_orders = session.query(Customer).filter(
    session.query(Order).filter(Order.customer_id == Customer.id).exists()
).all()

for customer in customers_with_orders:
    print(f"  {customer.name}")

# 3. IN subquery to find products in specific categories
print("\n3. Products in Electronics or Appliances categories:")
electronics_appliances = session.query(Product.id).filter(
    Product.category.in_(['Electronics', 'Appliances'])
).subquery()

products_in_categories = session.query(Product).filter(
    Product.id.in_(session.query(electronics_appliances.c.id))
).all()

for product in products_in_categories:
    print(f"  {product.name} ({product.category})")

# 4. Correlated subquery to find customers with more than 1 order
print("\n4. Customers with more than 1 order:")
customers_multiple_orders = session.query(Customer).filter(
    session.query(func.count(Order.id)).filter(Order.customer_id == Customer.id).scalar() > 1
).all()

for customer in customers_multiple_orders:
    order_count = session.query(func.count(Order.id)).filter(Order.customer_id == customer.id).scalar()
    print(f"  {customer.name} ({order_count} orders)")

# 5. Complex filtering with multiple conditions
print("\n5. High-value orders (>$200) with 4+ star average product rating:")
complex_filter_orders = session.query(Order).join(OrderItem).join(Product).join(Review).filter(
    and_(
        Order.total_amount > 200,
        Order.status == 'completed'
    )
).group_by(Order.id).having(func.avg(Review.rating) >= 4.0).all()

for order in complex_filter_orders:
    avg_rating = session.query(func.avg(Review.rating)).join(Product).join(OrderItem).filter(
        OrderItem.order_id == order.id
    ).scalar()
    print(f"  {order.order_number}: ${order.total_amount} (avg rating: {avg_rating:.1f}★)")

# 6. Window function example with subquery
print("\n6. Top 3 products by revenue in each category:")
from sqlalchemy import over

revenue_ranking = session.query(
    Product.name,
    Product.category,
    func.sum(OrderItem.total_price).label('total_revenue'),
    func.row_number().over(
        partition_by=Product.category,
        order_by=func.sum(OrderItem.total_price).desc()
    ).label('revenue_rank')
).join(OrderItem).group_by(Product.id, Product.name, Product.category).subquery()

top_products = session.query(revenue_ranking).filter(
    revenue_ranking.c.revenue_rank <= 3
).all()

for name, category, revenue, rank in top_products:
    print(f"  #{rank} {name} ({category}) - ${revenue:.2f}")


## Exercise 4: Eager Loading and Performance Optimization


In [None]:
# Solution: Eager loading and performance optimization
print("=== Eager Loading and Performance Optimization ===")

# 1. Demonstrate N+1 problem
print("\n1. N+1 Problem Example (BAD - Multiple Queries):")
print("Loading orders and then accessing customer for each order...")

# Turn off echo to see the difference
engine.echo = False

start_time = time.time()
orders = session.query(Order).all()  # 1 query
for order in orders:
    print(f"  {order.order_number} by {order.customer.name}")  # N queries (one per order)
n_plus_1_time = time.time() - start_time

print(f"Time taken: {n_plus_1_time:.4f} seconds")
print(f"Queries executed: {len(orders) + 1} (1 + {len(orders)})")

# 2. Fix with joinedload
print("\n2. Solution 1: joinedload (Single Query with JOIN):")
start_time = time.time()
orders_with_customers = session.query(Order).options(joinedload(Order.customer)).all()  # 1 query
for order in orders_with_customers:
    print(f"  {order.order_number} by {order.customer.name}")
joinedload_time = time.time() - start_time

print(f"Time taken: {joinedload_time:.4f} seconds")
print("Queries executed: 1 (with JOIN)")

# 3. Performance comparison function
def performance_comparison():
    """Compare different eager loading strategies"""
    print("\n3. Performance Comparison:")
    
    # Test data
    test_orders = session.query(Order).limit(3).all()
    
    # N+1 problem
    start_time = time.time()
    for order in test_orders:
        customer_name = order.customer.name
        order_items = order.order_items
        for item in order_items:
            product_name = item.product.name
    n_plus_1_time = time.time() - start_time
    
    # Optimized with eager loading
    start_time = time.time()
    optimized_orders = session.query(Order).options(
        joinedload(Order.customer),
        joinedload(Order.order_items).joinedload(OrderItem.product)
    ).limit(3).all()
    for order in optimized_orders:
        customer_name = order.customer.name
        order_items = order.order_items
        for item in order_items:
            product_name = item.product.name
    optimized_time = time.time() - start_time
    
    print(f"  N+1 Problem: {n_plus_1_time:.4f}s")
    print(f"  Optimized:   {optimized_time:.4f}s")
    print(f"  Improvement: {n_plus_1_time/optimized_time:.1f}x faster")

performance_comparison()

# 4. Query result caching
print("\n4. Query Result Caching:")
cache = {}

def cached_query(query_key, query_func):
    if query_key in cache:
        print(f"  Cache hit for '{query_key}'")
        return cache[query_key]
    else:
        print(f"  Cache miss for '{query_key}' - executing query")
        result = query_func()
        cache[query_key] = result
        return result

# First call - cache miss
start_time = time.time()
expensive_products = cached_query("expensive_products", 
    lambda: session.query(Product).filter(Product.price > 100).all())
first_call_time = time.time() - start_time

# Second call - cache hit
start_time = time.time()
expensive_products_cached = cached_query("expensive_products",
    lambda: session.query(Product).filter(Product.price > 100).all())
second_call_time = time.time() - start_time

print(f"  First call: {first_call_time:.4f}s")
print(f"  Cached call: {second_call_time:.4f}s")

# 5. Pagination function
print("\n5. Pagination Example:")
def paginate_products(page=1, per_page=3):
    offset = (page - 1) * per_page
    products = session.query(Product).offset(offset).limit(per_page).all()
    total_count = session.query(Product).count()
    total_pages = (total_count + per_page - 1) // per_page
    
    return products, total_count, total_pages

for page in range(1, 4):
    products, total, pages = paginate_products(page, 3)
    print(f"  Page {page}/{pages} ({len(products)} products):")
    for product in products:
        print(f"    {product.name} - ${product.price}")

# 6. Performance metrics summary
print("\n6. Performance Metrics Summary:")
print(f"  N+1 Problem: {n_plus_1_time:.4f}s")
print(f"  joinedload:   {joinedload_time:.4f}s")
print(f"  Cache miss:   {first_call_time:.4f}s")
print(f"  Cache hit:    {second_call_time:.4f}s")
print(f"  Cache improvement: {first_call_time/second_call_time:.1f}x faster")

# Turn echo back on
engine.echo = True

# Close the session
session.close()

print("\n" + "="*50)
print("Advanced Querying and Performance examples completed!")
print("="*50)
