# 002: Python Advanced ConceptsDecorators are a powerful Python feature that allows you to modify or enhance functions and classes without changing their source code.### 1.1 Function Decorators

In [None]:
# Basic decorator example
import time
from functools import wraps

def timing_decorator(func):
    """Measure execution time of a function"""
    @wraps(func)
    def wrapper(*args, **kwargs):
        start_time = time.time()
        result = func(*args, **kwargs)
        end_time = time.time()
        print(f"{func.__name__} took {end_time - start_time:.4f} seconds")
        return result
    return wrapper

@timing_decorator
def slow_function(n):
    """Simulate a slow operation"""
    total = 0
    for i in range(n):
        total += i ** 2
    return total

# Test the decorator
result = slow_function(1000000)
print(f"Result: {result}")

### 1.2 Decorators with Arguments

In [None]:
def repeat(times):
    """Decorator that repeats function execution"""
    def decorator(func):
        @wraps(func)
        def wrapper(*args, **kwargs):
            results = []
            for _ in range(times):
                results.append(func(*args, **kwargs))
            return results
        return wrapper
    return decorator

@repeat(times=3)
def greet(name):
    return f"Hello, {name}!"

# This will execute 3 times
messages = greet("Data Scientist")
print(messages)

### 1.3 Class Decorators

In [None]:
def singleton(cls):
    """Ensure only one instance of a class exists"""
    instances = {}
    
    @wraps(cls)
    def get_instance(*args, **kwargs):
        if cls not in instances:
            instances[cls] = cls(*args, **kwargs)
        return instances[cls]
    
    return get_instance

@singleton
class DatabaseConnection:
    def __init__(self, host, port):
        self.host = host
        self.port = port
        print(f"Creating connection to {host}:{port}")

# Both will return the same instance
db1 = DatabaseConnection("localhost", 5432)
db2 = DatabaseConnection("localhost", 5432)
print(f"Same instance? {db1 is db2}")  # True

### 1.4 Real-World Application: Memoization for ML Models

In [None]:
from functools import lru_cache
import numpy as np

class MLModelCache:
    """Cache expensive model predictions"""
    
    @staticmethod
    @lru_cache(maxsize=128)
    def expensive_prediction(feature_tuple):
        """Simulate expensive model inference"""
        features = np.array(feature_tuple)
        # Simulate complex computation
        time.sleep(0.1)  # Pretend this takes time
        return np.sum(features ** 2)
    
    @classmethod
    def predict(cls, features):
        """Convert features to tuple for caching"""
        return cls.expensive_prediction(tuple(features))

# Test caching performance
model = MLModelCache()

# First call - slow
start = time.time()
result1 = model.predict([1, 2, 3, 4, 5])
print(f"First call: {time.time() - start:.4f}s, Result: {result1}")

# Second call with same input - fast (cached)
start = time.time()
result2 = model.predict([1, 2, 3, 4, 5])
print(f"Cached call: {time.time() - start:.4f}s, Result: {result2}")

---

## 2. Generators and Iterators

Generators are memory-efficient tools for handling large datasets - crucial for big data processing.

### 2.1 Generator Functions

In [None]:
def data_stream_generator(n):
    """Simulate streaming data - memory efficient"""
    for i in range(n):
        # Yield instead of return - creates generator
        yield {"id": i, "value": i ** 2, "timestamp": time.time()}

# Process large dataset without loading all into memory
print("Processing stream of 1 million records...")
total = 0
for record in data_stream_generator(1_000_000):
    total += record["value"]
    if record["id"] % 200_000 == 0:
        print(f"Processed {record['id']} records...")

print(f"Total sum: {total}")

### 2.2 Generator Expressions

In [None]:
# Memory comparison: List vs Generator
import sys

# List comprehension - loads all in memory
list_comp = [x ** 2 for x in range(10000)]
print(f"List size: {sys.getsizeof(list_comp)} bytes")

# Generator expression - evaluates lazily
gen_exp = (x ** 2 for x in range(10000))
print(f"Generator size: {sys.getsizeof(gen_exp)} bytes")

# Both produce same results
print(f"Sum from list: {sum([x ** 2 for x in range(10000)])}")
print(f"Sum from generator: {sum((x ** 2 for x in range(10000)))}")

### 2.3 Custom Iterator Class

In [None]:
class STDFFileReader:
    """Custom iterator for reading STDF test data files"""
    
    def __init__(self, filename, batch_size=1000):
        self.filename = filename
        self.batch_size = batch_size
        self.current_batch = 0
        self.total_records = 10000  # Simulated
    
    def __iter__(self):
        self.current_batch = 0
        return self
    
    def __next__(self):
        if self.current_batch * self.batch_size >= self.total_records:
            raise StopIteration
        
        # Simulate reading a batch
        start = self.current_batch * self.batch_size
        end = min(start + self.batch_size, self.total_records)
        batch_data = [
            {"test_id": i, "result": "PASS" if i % 10 != 0 else "FAIL"}
            for i in range(start, end)
        ]
        
        self.current_batch += 1
        return batch_data

# Use the iterator
reader = STDFFileReader("test_data.stdf", batch_size=2000)
for batch_num, batch in enumerate(reader, 1):
    fail_count = sum(1 for record in batch if record["result"] == "FAIL")
    print(f"Batch {batch_num}: {len(batch)} records, {fail_count} failures")

---

## 3. Context Managers

Context managers ensure proper resource management - critical for file operations, database connections, and locks.

### 3.1 Using Context Managers

In [None]:
# Traditional file handling (risky)
def read_file_risky(filename):
    f = open(filename, 'r')
    data = f.read()
    f.close()  # What if an exception occurs before this?
    return data

# Safe file handling with context manager
def read_file_safe(filename):
    with open(filename, 'r') as f:
        data = f.read()
    # File automatically closed, even if exception occurs
    return data

# Multiple context managers
with open('input.txt', 'r') as infile, open('output.txt', 'w') as outfile:
    for line in infile:
        outfile.write(line.upper())

### 3.2 Creating Custom Context Managers (Class-based)

In [None]:
class Timer:
    """Context manager for timing code execution"""
    
    def __init__(self, name="Code block"):
        self.name = name
    
    def __enter__(self):
        self.start_time = time.time()
        print(f"Starting: {self.name}")
        return self
    
    def __exit__(self, exc_type, exc_value, traceback):
        self.end_time = time.time()
        self.elapsed = self.end_time - self.start_time
        print(f"Finished: {self.name} in {self.elapsed:.4f}s")
        
        # Return False to propagate exceptions
        return False

# Use the timer
with Timer("Data processing"):
    data = [i ** 2 for i in range(1000000)]
    result = sum(data)
    print(f"Processed {len(data)} items")

### 3.3 Creating Context Managers with contextlib

In [None]:
from contextlib import contextmanager

@contextmanager
def database_transaction(connection):
    """Manage database transaction with automatic rollback on error"""
    print("Beginning transaction...")
    try:
        yield connection
        print("Committing transaction...")
        # connection.commit()  # Simulated
    except Exception as e:
        print(f"Error occurred: {e}")
        print("Rolling back transaction...")
        # connection.rollback()  # Simulated
        raise
    finally:
        print("Closing connection...")
        # connection.close()  # Simulated

# Simulate usage
class FakeConnection:
    pass

conn = FakeConnection()
with database_transaction(conn) as db:
    print("Executing queries...")
    # Perform database operations

### 3.4 Real-World: Model Training Context Manager

In [None]:
@contextmanager
def ml_training_session(model_name, log_dir="./logs"):
    """Context manager for ML training with automatic cleanup"""
    import os
    
    # Setup
    print(f"Starting training session: {model_name}")
    start_time = time.time()
    os.makedirs(log_dir, exist_ok=True)
    
    session_info = {
        "model": model_name,
        "start_time": start_time,
        "log_dir": log_dir
    }
    
    try:
        yield session_info
    except Exception as e:
        print(f"Training failed: {e}")
        # Save error state
        raise
    finally:
        # Cleanup and logging
        duration = time.time() - start_time
        print(f"Training session completed in {duration:.2f}s")
        # Save training metadata
        print(f"Logs saved to {log_dir}")

# Use in training
with ml_training_session("RandomForest_v1") as session:
    print(f"Training {session['model']}...")
    time.sleep(0.5)  # Simulate training
    print("Epoch 1 complete")
    print("Epoch 2 complete")

---

## 4. Magic Methods and Operator Overloading

Magic methods (dunder methods) allow you to define how objects behave with Python operators.

### 4.1 Common Magic Methods

In [None]:
class Vector:
    """2D Vector with operator overloading"""
    
    def __init__(self, x, y):
        self.x = x
        self.y = y
    
    def __repr__(self):
        """String representation for developers"""
        return f"Vector({self.x}, {self.y})"
    
    def __str__(self):
        """String representation for users"""
        return f"<{self.x}, {self.y}>"
    
    def __add__(self, other):
        """Vector addition: v1 + v2"""
        return Vector(self.x + other.x, self.y + other.y)
    
    def __sub__(self, other):
        """Vector subtraction: v1 - v2"""
        return Vector(self.x - other.x, self.y - other.y)
    
    def __mul__(self, scalar):
        """Scalar multiplication: v * scalar"""
        return Vector(self.x * scalar, self.y * scalar)
    
    def __eq__(self, other):
        """Equality comparison: v1 == v2"""
        return self.x == other.x and self.y == other.y
    
    def __len__(self):
        """Length (magnitude) of vector"""
        return int((self.x ** 2 + self.y ** 2) ** 0.5)
    
    def __getitem__(self, index):
        """Access by index: v[0], v[1]"""
        if index == 0:
            return self.x
        elif index == 1:
            return self.y
        else:
            raise IndexError("Vector index out of range")

# Test the vector class
v1 = Vector(3, 4)
v2 = Vector(1, 2)

print(f"v1: {v1}")
print(f"v2: {v2}")
print(f"v1 + v2: {v1 + v2}")
print(f"v1 - v2: {v1 - v2}")
print(f"v1 * 3: {v1 * 3}")
print(f"v1 == v2: {v1 == v2}")
print(f"Length of v1: {len(v1)}")
print(f"v1[0]: {v1[0]}, v1[1]: {v1[1]}")

### 4.2 Real-World: Custom ML Model Container

In [None]:
class ModelEnsemble:
    """Ensemble of ML models with operator overloading"""
    
    def __init__(self, models=None):
        self.models = models or []
    
    def __add__(self, model):
        """Add model to ensemble: ensemble + model"""
        new_ensemble = ModelEnsemble(self.models.copy())
        new_ensemble.models.append(model)
        return new_ensemble
    
    def __len__(self):
        """Number of models in ensemble"""
        return len(self.models)
    
    def __getitem__(self, index):
        """Access model by index"""
        return self.models[index]
    
    def __iter__(self):
        """Iterate over models"""
        return iter(self.models)
    
    def __call__(self, X):
        """Make ensemble callable: ensemble(X)"""
        predictions = [model(X) for model in self.models]
        # Average predictions
        return sum(predictions) / len(predictions)
    
    def __repr__(self):
        return f"ModelEnsemble(models={len(self.models)})"

# Simulate simple models
class SimpleModel:
    def __init__(self, name, bias):
        self.name = name
        self.bias = bias
    
    def __call__(self, X):
        return X + self.bias
    
    def __repr__(self):
        return f"Model({self.name})"

# Build ensemble using operator overloading
ensemble = ModelEnsemble()
ensemble = ensemble + SimpleModel("M1", 1.0)
ensemble = ensemble + SimpleModel("M2", 2.0)
ensemble = ensemble + SimpleModel("M3", 1.5)

print(f"Ensemble: {ensemble}")
print(f"Number of models: {len(ensemble)}")
print(f"First model: {ensemble[0]}")
print(f"Prediction for X=10: {ensemble(10)}")

---

## 5. Meta-Programming

Meta-programming allows you to write code that manipulates code - advanced but powerful.

### 5.1 Dynamic Class Creation

In [None]:
def create_model_class(name, features):
    """Dynamically create a model class"""
    
    def __init__(self, **kwargs):
        for feature in features:
            setattr(self, feature, kwargs.get(feature, None))
    
    def __repr__(self):
        attrs = ", ".join(f"{k}={getattr(self, k)}" for k in features)
        return f"{name}({attrs})"
    
    # Create class dynamically
    return type(name, (), {
        '__init__': __init__,
        '__repr__': __repr__,
        'features': features
    })

# Create different model classes dynamically
LinearModel = create_model_class('LinearModel', ['coef', 'intercept'])
TreeModel = create_model_class('TreeModel', ['max_depth', 'n_estimators'])

# Use the dynamically created classes
lr = LinearModel(coef=[1.5, 2.3], intercept=0.5)
rf = TreeModel(max_depth=10, n_estimators=100)

print(lr)
print(rf)
print(f"LinearModel features: {LinearModel.features}")

### 5.2 Metaclasses

In [None]:
class ValidationMeta(type):
    """Metaclass that adds validation to all methods"""
    
    def __new__(cls, name, bases, attrs):
        # Wrap all methods with validation
        for key, value in attrs.items():
            if callable(value) and not key.startswith('_'):
                attrs[key] = cls.validate_wrapper(value)
        
        return super().__new__(cls, name, bases, attrs)
    
    @staticmethod
    def validate_wrapper(func):
        def wrapper(self, *args, **kwargs):
            print(f"Validating call to {func.__name__}...")
            result = func(self, *args, **kwargs)
            print(f"Validation complete for {func.__name__}")
            return result
        return wrapper

class DataProcessor(metaclass=ValidationMeta):
    """All methods automatically get validation"""
    
    def clean_data(self, data):
        return [x for x in data if x is not None]
    
    def transform_data(self, data):
        return [x * 2 for x in data]

# Test the metaclass
processor = DataProcessor()
clean = processor.clean_data([1, 2, None, 3])
transformed = processor.transform_data(clean)
print(f"Result: {transformed}")

---

## 6. Project: Advanced Python for STDF Analysis

Let's apply all advanced concepts to build a sophisticated STDF data processor.

### 📝 Implementation

**Purpose:** Core implementation with detailed code

**Key implementation details below.**

In [None]:
from contextlib import contextmanager
from functools import lru_cache, wraps
import time
from typing import Iterator, Dict, Any
class STDFAnalyzer:
    """Enterprise-grade STDF file analyzer using advanced Python"""
    
    def __init__(self, cache_size=1000):
        self._cache_size = cache_size
        self._metrics = {'reads': 0, 'cache_hits': 0}
    
    # Decorator for timing
    @staticmethod
    def timed(func):
        @wraps(func)
        def wrapper(*args, **kwargs):
            start = time.time()
            result = func(*args, **kwargs)
            duration = time.time() - start
            print(f"{func.__name__}: {duration:.4f}s")
            return result
        return wrapper
    
    # Generator for memory-efficient reading
    def read_stdf_records(self, filename: str, batch_size: int = 1000) -> Iterator[Dict]:
        """Generator that yields STDF records in batches"""
        total_records = 50000  # Simulated
        
        for batch_start in range(0, total_records, batch_size):
            batch_end = min(batch_start + batch_size, total_records)
            # Simulate reading records
            yield {
                'batch_id': batch_start // batch_size,
                'records': [
                    {
                        'test_id': i,
                        'voltage': 3.3 + (i % 10) * 0.01,
                        'current': 0.5 + (i % 5) * 0.02,
                        'result': 'PASS' if i % 15 != 0 else 'FAIL'
                    }
                    for i in range(batch_start, batch_end)
                ]
            }
            self._metrics['reads'] += 1
    
    # Context manager for analysis session
    @contextmanager
    def analysis_session(self, filename: str):
        """Context manager for STDF analysis with cleanup"""
        print(f"Opening STDF file: {filename}")
        session_data = {
            'filename': filename,
            'start_time': time.time(),
            'records_processed': 0
        }
        
        try:
            yield session_data
        finally:
            duration = time.time() - session_data['start_time']
            print(f"Analysis complete:")
            print(f"  - File: {filename}")
            print(f"  - Records: {session_data['records_processed']}")
            print(f"  - Duration: {duration:.2f}s")
            print(f"  - Cache hits: {self._metrics['cache_hits']}/{self._metrics['reads']}")
    
    # Cached computation
    @lru_cache(maxsize=128)
    def compute_test_statistics(self, test_id: int, n_samples: int) -> Dict[str, float]:


### 📝 Implementation Part 2

**Purpose:** Continue implementation

**Key implementation details below.**

In [None]:
        """Compute statistics for a test (cached for performance)"""
        # Simulate expensive computation
        time.sleep(0.01)
        return {
            'mean': test_id * 1.5,
            'std': test_id * 0.1,
            'n_samples': n_samples
        }
    
    @timed
    def analyze_file(self, filename: str) -> Dict[str, Any]:
        """Complete STDF file analysis using all advanced features"""
        
        with self.analysis_session(filename) as session:
            results = {
                'total_records': 0,
                'pass_count': 0,
                'fail_count': 0,
                'test_stats': {}
            }
            
            # Process in batches using generator
            for batch in self.read_stdf_records(filename):
                for record in batch['records']:
                    results['total_records'] += 1
                    
                    if record['result'] == 'PASS':
                        results['pass_count'] += 1
                    else:
                        results['fail_count'] += 1
                    
                    # Use cached statistics
                    test_id = record['test_id'] % 10  # Group tests
                    if test_id not in results['test_stats']:
                        results['test_stats'][test_id] = self.compute_test_statistics(
                            test_id, 100
                        )
                        self._metrics['cache_hits'] += 1
            
            session['records_processed'] = results['total_records']
            
            # Calculate yield
            results['yield_percent'] = (
                results['pass_count'] / results['total_records'] * 100
            )
            
            return results
# Demonstrate the advanced STDF analyzer
print("="*60)
print("Advanced STDF Analyzer Demo")
print("="*60)
analyzer = STDFAnalyzer()
results = analyzer.analyze_file("production_data.stdf")
print("\n" + "="*60)
print("Analysis Results:")
print("="*60)
print(f"Total Records: {results['total_records']}")
print(f"Pass: {results['pass_count']}")
print(f"Fail: {results['fail_count']}")
print(f"Yield: {results['yield_percent']:.2f}%")
print(f"Unique Tests: {len(results['test_stats'])}")


---

## 7. Key Takeaways

### When to Use Each Feature:

1. **Decorators**
   - ✅ Cross-cutting concerns (logging, timing, caching)
   - ✅ Access control and authentication
   - ✅ Input validation

2. **Generators**
   - ✅ Large datasets that don't fit in memory
   - ✅ Streaming data processing
   - ✅ Infinite sequences

3. **Context Managers**
   - ✅ Resource management (files, connections)
   - ✅ Transaction handling
   - ✅ Setup/teardown operations

4. **Magic Methods**
   - ✅ Custom data structures
   - ✅ Making objects behave like built-in types
   - ✅ Operator overloading

5. **Meta-programming**
   - ✅ Frameworks and libraries
   - ✅ Dynamic code generation
   - ⚠️ Use sparingly - can reduce code readability

---

## 8. Practice Exercises

### Exercise 1: Create a retry decorator
Write a decorator that retries a function up to N times if it raises an exception.

### Exercise 2: Build a data pipeline generator
Create a generator that simulates an ETL pipeline processing records one at a time.

### Exercise 3: Custom context manager for logging
Build a context manager that logs all function calls within its scope.

### Exercise 4: Matrix class with operators
Implement a Matrix class with +, -, *, and @ (matrix multiplication) operators.

---

## Next Steps

- ✅ Complete exercises above
- → Continue to **003_Python_Concurrency_Parallelism.ipynb**
- → Advanced topic: **044_Model_Interpretability_SHAP_LIME.ipynb** (uses decorators)
- → Application: **092_Apache_Spark_PySpark.ipynb** (uses generators extensively)

---

**Notebook Complete! 🚀**