In [None]:
# Load Redis Workshop Styling
from IPython.display import HTML

css_path = '../../../shared/notebook-styles.css'
try:
    with open(css_path, 'r') as f:
        css = f.read()
        display(HTML(f'<style>{css}</style>'))
        print('‚úÖ Workshop styling loaded!')
except FileNotFoundError:
    print('‚ÑπÔ∏è  Using default notebook styling')

# Module 6: Performance Efficiency & Data Modeling

## üéØ Interactive Lab: Redis Data Structures & Optimization

**Duration:** 60 minutes  
**Level:** Intermediate  

Master Redis data structures and performance optimization:
- üìä **Strings, Hashes, Lists, Sets, Sorted Sets**
- ‚ö° **Performance benchmarking**
- üéØ **Choosing the right data structure**
- üîß **Memory optimization**

---


## üê≥ Start Docker Redis Container

Before we begin, let's start a Redis container using Docker:

In [2]:
# Start Redis container
!docker run -d --name workshop-redis-module6 -p 6379:6379 redis:7-alpine

# Wait for Redis to be ready
import time
time.sleep(5)

# Test connection
!docker exec workshop-redis-module6 redis-cli ping

print('‚úÖ Redis container is running on localhost:6379')

docker: Error response from daemon: Conflict. The container name "/workshop-redis-module6" is already in use by container "9a9328363544484ecd2c30a0edf711b69d0f812722cb8ec5ec664f3fc464d2b3". You have to remove (or rename) that container to be able to reuse that name.

Run 'docker run --help' for more information
Error response from daemon: container 9a9328363544484ecd2c30a0edf711b69d0f812722cb8ec5ec664f3fc464d2b3 is not running
‚úÖ Redis container is running on localhost:6379


## Part 1: Setup


In [3]:
!pip install -q redis pandas matplotlib

import redis
import time
import random
import statistics
import pandas as pd
import matplotlib.pyplot as plt

# Connect to Redis
r = redis.Redis(host='localhost', port=6379, decode_responses=True)
r.ping()
print('‚úÖ Connected to Redis')


‚úÖ Connected to Redis


---

## Part 2: Redis Data Structures Overview

### Available Data Types

| Type | Use Case | Example |
|------|----------|----------|
| **String** | Cache values, counters | Session data, API responses |
| **Hash** | Objects with fields | User profiles, product details |
| **List** | Ordered collections | Activity feeds, queues |
| **Set** | Unique items | Tags, followers |
| **Sorted Set** | Ranked items | Leaderboards, time-series |


### 1. Strings


In [4]:
# String operations
r.set('user:1001:name', 'Alice Johnson')
r.set('page:home:views', 0)

# Increment counter
r.incr('page:home:views')
r.incr('page:home:views')

# Get values
name = r.get('user:1001:name')
views = r.get('page:home:views')

print(f'‚úÖ String Operations:')
print(f'   Name: {name}')
print(f'   Views: {views}')


‚úÖ String Operations:
   Name: Alice Johnson
   Views: 2


### 2. Hashes - Store Objects


In [5]:
# Hash operations (ideal for objects)
user = {
    'id': '1001',
    'name': 'Alice Johnson',
    'email': 'alice@example.com',
    'age': '30',
    'country': 'USA'
}

# Store as hash
r.hset('user:1001', mapping=user)

# Get single field
email = r.hget('user:1001', 'email')

# Get multiple fields
fields = r.hmget('user:1001', 'name', 'age')

# Get all fields
user_data = r.hgetall('user:1001')

print(f'‚úÖ Hash Operations:')
print(f'   Email: {email}')
print(f'   Name, Age: {fields}')
print(f'   Full user: {user_data}')


‚úÖ Hash Operations:
   Email: alice@example.com
   Name, Age: ['Alice Johnson', '30']
   Full user: {'id': '1001', 'name': 'Alice Johnson', 'email': 'alice@example.com', 'age': '30', 'country': 'USA'}


### 3. Lists - Ordered Collections


In [6]:
# List operations (activity feed)
activities = [
    'User logged in',
    'Viewed product P001',
    'Added to cart',
    'Completed checkout'
]

# Add to list (FIFO queue)
for activity in activities:
    r.rpush('user:1001:activity', activity)

# Get list length
length = r.llen('user:1001:activity')

# Get all items
all_activities = r.lrange('user:1001:activity', 0, -1)

# Get recent 2 activities
recent = r.lrange('user:1001:activity', -2, -1)

print(f'‚úÖ List Operations:')
print(f'   Total activities: {length}')
print(f'   Recent: {recent}')


‚úÖ List Operations:
   Total activities: 4
   Recent: ['Added to cart', 'Completed checkout']


### 4. Sets - Unique Items


In [7]:
# Set operations (tags, interests)
r.sadd('user:1001:interests', 'technology', 'databases', 'cloud')
r.sadd('user:1002:interests', 'databases', 'ai', 'machine-learning')

# Get all members
user1_interests = r.smembers('user:1001:interests')

# Check membership
has_tech = r.sismember('user:1001:interests', 'technology')

# Set intersection (common interests)
common = r.sinter('user:1001:interests', 'user:1002:interests')

print(f'‚úÖ Set Operations:')
print(f'   User 1 interests: {user1_interests}')
print(f'   Has technology: {has_tech}')
print(f'   Common interests: {common}')


‚úÖ Set Operations:
   User 1 interests: {'cloud', 'technology', 'databases'}
   Has technology: 1
   Common interests: {'databases'}


### 5. Sorted Sets - Ranked Items


In [8]:
# Sorted set operations (leaderboard)
scores = {
    'alice': 1500,
    'bob': 2000,
    'charlie': 1800,
    'diana': 2200,
    'eve': 1600
}

# Add to sorted set
for player, score in scores.items():
    r.zadd('leaderboard:global', {player: score})

# Get top 3 players
top3 = r.zrevrange('leaderboard:global', 0, 2, withscores=True)

# Get player rank
alice_rank = r.zrevrank('leaderboard:global', 'alice')

# Get player score
alice_score = r.zscore('leaderboard:global', 'alice')

print(f'‚úÖ Sorted Set Operations:')
print(f'   Top 3 Players:')
for i, (player, score) in enumerate(top3, 1):
    print(f'     {i}. {player}: {int(score)} points')
print(f'   Alice rank: #{alice_rank + 1}')
print(f'   Alice score: {alice_score}')


‚úÖ Sorted Set Operations:
   Top 3 Players:
     1. diana: 2200 points
     2. bob: 2000 points
     3. charlie: 1800 points
   Alice rank: #5
   Alice score: 1500.0


---

## Part 3: Performance Benchmarking

Let's compare performance across data structures:


In [9]:
def benchmark(name, operation, iterations=1000):
    """Benchmark operation performance"""
    times = []
    for _ in range(iterations):
        start = time.perf_counter()
        operation()
        times.append((time.perf_counter() - start) * 1000)
    
    return {
        'name': name,
        'avg': statistics.mean(times),
        'median': statistics.median(times),
        'p95': sorted(times)[int(iterations * 0.95)],
        'p99': sorted(times)[int(iterations * 0.99)]
    }

# Benchmark different operations
results = []

# String SET
results.append(benchmark('String SET', lambda: r.set('bench:str', 'value')))

# String GET
r.set('bench:str', 'value')
results.append(benchmark('String GET', lambda: r.get('bench:str')))

# Hash HSET
results.append(benchmark('Hash HSET', lambda: r.hset('bench:hash', 'field', 'value')))

# List RPUSH
results.append(benchmark('List RPUSH', lambda: r.rpush('bench:list', 'item')))

# Set SADD
results.append(benchmark('Set SADD', lambda: r.sadd('bench:set', 'member')))

# Sorted Set ZADD
results.append(benchmark('ZSet ZADD', lambda: r.zadd('bench:zset', {'member': 1.0})))

print('‚ö° Performance Results (1000 iterations):')
print()
print(f'{"Operation":<15} | {"Avg":<8} | {"Median":<8} | {"P95":<8} | {"P99":<8}')
print('-' * 65)
for result in results:
    print(f'{result["name"]:<15} | {result["avg"]:>6.3f}ms | {result["median"]:>6.3f}ms | {result["p95"]:>6.3f}ms | {result["p99"]:>6.3f}ms')


‚ö° Performance Results (1000 iterations):

Operation       | Avg      | Median   | P95      | P99     
-----------------------------------------------------------------
String SET      |  0.107ms |  0.080ms |  0.204ms |  0.813ms
String GET      |  0.064ms |  0.061ms |  0.076ms |  0.108ms
Hash HSET       |  0.062ms |  0.061ms |  0.071ms |  0.082ms
List RPUSH      |  0.064ms |  0.059ms |  0.075ms |  0.155ms
Set SADD        |  0.061ms |  0.060ms |  0.070ms |  0.088ms
ZSet ZADD       |  0.065ms |  0.064ms |  0.075ms |  0.080ms


---

## Part 4: Choosing the Right Data Structure

### Decision Guide

```
Need to store a simple value?
‚îî‚îÄ> Use STRING

Need to store an object with multiple fields?
‚îî‚îÄ> Use HASH

Need ordered collection (queue, feed)?
‚îî‚îÄ> Use LIST

Need unique items (tags, followers)?
‚îú‚îÄ> No ordering needed? Use SET
‚îî‚îÄ> Need ranking/scoring? Use SORTED SET

Need counting unique items?
‚îî‚îÄ> Use HYPERLOGLOG

Need probability checking?
‚îî‚îÄ> Use BLOOM FILTER
```


---

## Part 5: Memory Optimization

Let's analyze memory usage:


In [10]:
# Memory analysis
import sys

def get_memory_usage(key):
    """Get memory usage for a key"""
    return r.memory_usage(key)

# Store same data in different structures
user_data = {
    'id': '1001',
    'name': 'Alice Johnson',
    'email': 'alice@example.com',
    'age': '30'
}

# As string (JSON)
import json
r.set('mem:user:string', json.dumps(user_data))

# As hash
r.hset('mem:user:hash', mapping=user_data)

# Compare memory
string_mem = get_memory_usage('mem:user:string')
hash_mem = get_memory_usage('mem:user:hash')

print('üíæ Memory Usage Comparison:')
print(f'   String (JSON): {string_mem} bytes')
print(f'   Hash: {hash_mem} bytes')
print(f'   Difference: {abs(string_mem - hash_mem)} bytes')
print(f'   Hash is {(string_mem / hash_mem):.2f}x more memory efficient!' if hash_mem < string_mem else '')


üíæ Memory Usage Comparison:
   String (JSON): 168 bytes
   Hash: 136 bytes
   Difference: 32 bytes
   Hash is 1.24x more memory efficient!


---

## Part 6: Real-World Example - Shopping Cart

Let's build an optimized shopping cart:


In [11]:
class ShoppingCart:
    """Optimized shopping cart using Redis hashes"""
    
    def __init__(self, redis_client, user_id):
        self.r = redis_client
        self.user_id = user_id
        self.key = f'cart:{user_id}'
    
    def add_item(self, product_id, quantity=1):
        """Add item to cart"""
        self.r.hincrby(self.key, product_id, quantity)
        self.r.expire(self.key, 86400)  # 24-hour TTL
    
    def remove_item(self, product_id):
        """Remove item from cart"""
        self.r.hdel(self.key, product_id)
    
    def get_items(self):
        """Get all cart items"""
        return self.r.hgetall(self.key)
    
    def get_item_count(self):
        """Get total items in cart"""
        items = self.r.hgetall(self.key)
        return sum(int(q) for q in items.values())
    
    def clear(self):
        """Clear cart"""
        self.r.delete(self.key)

# Test shopping cart
cart = ShoppingCart(r, 'user:1001')

cart.add_item('P001', 2)
cart.add_item('P002', 1)
cart.add_item('P003', 3)

print('üõí Shopping Cart:')
print(f'   Items: {cart.get_items()}')
print(f'   Total quantity: {cart.get_item_count()}')


üõí Shopping Cart:
   Items: {'P001': '2', 'P002': '1', 'P003': '3'}
   Total quantity: 6


## Cleanup


In [12]:
# Clean up test data
keys = r.keys('*')
deleted = len(keys)
for key in keys:
    r.delete(key)

print(f'‚úÖ Redis data cleaned: {deleted} keys deleted')

# Stop and remove Docker container
!docker stop workshop-redis-module6
!docker rm workshop-redis-module6

print('‚úÖ Docker container removed')
print('‚úÖ Cleanup complete')

‚úÖ Redis data cleaned: 15 keys deleted
workshop-redis-module6
workshop-redis-module6
‚úÖ Docker container removed
‚úÖ Cleanup complete


---

## üéØ Key Takeaways

### ‚úÖ Data Structure Selection

- **Strings**: Simple values, counters, cache
- **Hashes**: Objects with multiple fields (most memory efficient)
- **Lists**: Ordered collections, queues, feeds
- **Sets**: Unique items, relationships
- **Sorted Sets**: Rankings, leaderboards, time-series

### ‚ö° Performance Insights

- All operations are sub-millisecond
- String operations are fastest
- Sorted sets have slight overhead (for ordering)
- Hashes are most memory-efficient for objects

### üîß Optimization Tips

1. **Use hashes for objects** (vs JSON strings)
2. **Set TTLs** to auto-expire data
3. **Use pipelining** for bulk operations
4. **Choose appropriate data structures** (avoid misuse)
5. **Monitor memory usage** with `INFO memory`

---

## üéâ Well Done!

You now understand Redis data structures and can optimize for production!


---

## üßπ Cleanup

Let's clean up our Docker container and Redis data:

In [None]:
# Cleanup (if not already done)
try:
    # Try to flush if Redis is still running
    r.flushdb()
    print('‚úÖ Redis database flushed')
except:
    print('‚ÑπÔ∏è  Redis already cleaned up')

# Stop and remove Docker container (if still running)
!docker stop workshop-redis-module6 2>/dev/null || true
!docker rm workshop-redis-module6 2>/dev/null || true

print('‚úÖ Docker container stopped and removed')
print('üéâ Lab complete!')