In [1]:
# SOLID Principles
# Design Patterns
# Dependency Injection
# Object Composition
# Data Transfer Objects (DTOs)
# Repository Pattern
# Factory Pattern

In [2]:
# SOLID Principles

In [3]:
# Single Responsibility Principle (SRP):

# Each class should have only one reason to change, meaning it should have only one job or responsibility.

In [3]:
# Benefits of SRP in Data Engineering

# Maintainability: Code is easier to maintain and modify when each class or function has a single responsibility.

# Testability: Unit testing becomes simpler when each component has a clear, focused role.

# Scalability: Itâ€™s easier to extend functionality without affecting other parts of the system.

# Collaboration: Clear separation of responsibilities allows multiple team members to work on different parts of the codebase simultaneously.

In [74]:
# Example Valid SRP Cases:

# Separate classes for loading and processing data.
# Separate classes for different database operations.
# Separate classes for different stages of a data pipeline.

# Example Invalid SRP Cases:

# Combining data loading and processing in one class.
# Combining multiple database operations in one class.
# Combining different stages of a data pipeline in one class.

In [5]:
class DataLoader:
    def load_data(self, source):
        # Code to load data from a source
        pass 

class DataProcessor:
    def process_data(self, data):
        # Code to process the loaded data
        pass

In [55]:
# The Single Responsibility Principle (SRP) states that a class should have only one reason to change, meaning it should have only one job or responsibility. 
# This principle helps in making classes more modular, easier to maintain, and less prone to bugs.

# Valid Cases
# Case 1: Loading and Processing Data

In [57]:
# Valid: Separate classes for loading and processing data.

# Here, DataLoader has the responsibility of loading data, and DataProcessor has the responsibility of processing data.
# They each have a single reason to change: DataLoader might change if the loading logic changes, and DataProcessor might change if the processing logic changes.

class DataLoader:
    def load_data(self, source):
        # Code to load data from a source
        print(f"Loading data from {source}")

class DataProcessor:
    def process_data(self, data): # This is not composition since object is not stored as a class attribute. 
        # Code to process the loaded data
        print(f"Processing data: {data}")

# Usage
data_loader = DataLoader()
data_processor = DataProcessor()

data = data_loader.load_data("database")

data_processor.process_data(data)


Loading data from database
Processing data: None


In [59]:
# ase 2: Separate Concerns for Database Operations

# Valid: Separate classes for different database operations.

In [61]:
# Here, UserRepository is responsible for retrieving users, while UserService is responsible for updating users. They each have a single responsibility.

class UserRepository:
    def get_user(self, user_id):
        # Code to retrieve user from database
        print(f"Getting user with ID {user_id}")

class UserService:
    def update_user(self, user_id, new_data):
        # Code to update user in the database
        print(f"Updating user with ID {user_id} with data {new_data}")

# Usage
user_repo = UserRepository()
user_service = UserService()

user_repo.get_user(1)
user_service.update_user(1, {"name": "Alice"})


Getting user with ID 1
Updating user with ID 1 with data {'name': 'Alice'}


In [62]:
# Invalid Cases
# Case 1: Combining Data Loading and Processing

# Invalid: A class that handles both loading and processing data.

In [65]:
# Here, DataHandler has multiple responsibilities: loading and processing data. 

class DataHandler: # It violates the Single Responsibility Principle (SRP) because it has two reasons to change: if the loading logic changes or if the processing logic changes.
    def load_data(self, source):
        # Code to load data from a source
        print(f"Loading data from {source}")

    def process_data(self, data):
        # Code to process the loaded data
        print(f"Processing data: {data}")

# Usage
data_handler = DataHandler()
data = data_handler.load_data("database")
data_handler.process_data(data)


Loading data from database
Processing data: None


In [66]:
# Combining Multiple Database Operations

# Invalid: A class that handles multiple types of database operations.

In [68]:
# Here, UserDatabaseHandler has multiple responsibilities: retrieving, updating, and deleting users. This violates SRP because changes in 
# any of these operations would require changes to the same class.

class UserDatabaseHandler:
    def get_user(self, user_id):
        # Code to retrieve user from database
        print(f"Getting user with ID {user_id}")

    def update_user(self, user_id, new_data):
        # Code to update user in the database
        print(f"Updating user with ID {user_id} with data {new_data}")

    def delete_user(self, user_id):
        # Code to delete user from database
        print(f"Deleting user with ID {user_id}")

# Usage
user_db_handler = UserDatabaseHandler()
user_db_handler.get_user(1)
user_db_handler.update_user(1, {"name": "Alice"})
user_db_handler.delete_user(1)


Getting user with ID 1
Updating user with ID 1 with data {'name': 'Alice'}
Deleting user with ID 1


In [69]:
# Case: Managing a Data Pipeline

# Invalid: Combining different stages of a data pipeline into one class.

In [70]:
class DataPipeline:
    def extract_data(self, source):
        # Code to extract data
        print(f"Extracting data from {source}")

    def transform_data(self, data):
        # Code to transform data
        print(f"Transforming data: {data}")

    def load_data(self, data, destination):
        # Code to load data
        print(f"Loading data to {destination}")

# Usage
pipeline = DataPipeline()
data = pipeline.extract_data("source")
transformed_data = pipeline.transform_data(data)
pipeline.load_data(transformed_data, "destination")


Extracting data from source
Transforming data: None
Loading data to destination


In [71]:
# Here, DataPipeline has multiple responsibilities: extracting, transforming, and loading data. This violates SRP because changes to any of these stages
# would require changes to the same class.

In [72]:
# Valid: Separating different stages of a data pipeline into different classes.

class DataExtractor:
    def extract_data(self, source):
        # Code to extract data
        print(f"Extracting data from {source}")
        return f"data from {source}"

class DataTransformer:
    def transform_data(self, data):
        # Code to transform data
        print(f"Transforming data: {data}")
        return f"transformed {data}"

class DataLoader:
    def load_data(self, data, destination):
        # Code to load data
        print(f"Loading data to {destination}")

# Usage
extractor = DataExtractor()
transformer = DataTransformer()
loader = DataLoader()

data = extractor.extract_data("source")
transformed_data = transformer.transform_data(data)
loader.load_data(transformed_data, "destination")


Extracting data from source
Transforming data: data from source
Loading data to destination


In [6]:
# Open/Closed Principle (OCP):

# Software entities should be open for extension but closed for modification.

In [None]:
# The Open/Closed Principle is one of the five SOLID principles of object-oriented design. It states that:

# Software entities (classes, modules, functions, etc.) should be open for extension, but closed for modification.
# This means that you should be able to add new functionality to a class or module without altering its existing code.

# Inheritance can be a way to achieve this, but it's not the only way. The principle encourages designs that allow the behavior of software entities to be extended 
# without modifying their source code.

# Inheritance and OCP
# While inheritance can help achieve the Open/Closed Principle by allowing you to extend existing classes, OCP is more closely related to the broader concept of
# polymorphism and the use of interfaces or abstract classes.



In [None]:
class DataProcessor:
    def process(self, data):
        # Default processing logic
        pass

class AdvancedDataProcessor(DataProcessor): # Inheritance by default is open for extension which is example of Open closed principle.
    def process(self, data):
        # Extended processing logic
        pass

In [7]:
class Shape:
    def area(self):
        raise NotImplementedError("Subclasses should implement this!")

class Rectangle(Shape):
    def __init__(self, width, height):
        self.width = width
        self.height = height
    
    def area(self):
        return self.width * self.height

class Circle(Shape):
    def __init__(self, radius):
        self.radius = radius
    
    def area(self):
        return 3.14 * self.radius * self.radius

def calculate_area(shape):
    return shape.area()

# Extending the behavior without modifying the existing code
shapes = [Rectangle(10, 20), Circle(5)]
areas = [calculate_area(shape) for shape in shapes]
print(areas)  # Output: [200, 78.5]


[200, 78.5]


The Shape class is open for extension (you can add new shapes by inheriting from it).
The Shape class is closed for modification (you don't need to change the Shape class to add new shapes).

In [9]:
from abc import ABC, abstractmethod

class Shape(ABC): # This is an abstract class
    @abstractmethod
    def area(self):
        pass

class Rectangle(Shape): # This is a concrete class and is inheriting from abstract class
    def __init__(self, width, height):
        self.width = width
        self.height = height
    
    def area(self):
        return self.width * self.height

class Circle(Shape): # This is a concrete class and is inheriting from abstract class and is example of multiple inheritance
    def __init__(self, radius):
        self.radius = radius
    
    def area(self):
        return 3.14 * self.radius * self.radius

def calculate_area(shape: Shape): # This is an example of polymorphism where we are passing different objects of different classes but they are of same type
    return shape.area()

# Extending the behavior without modifying the existing code
shapes = [Rectangle(10, 20), Circle(5)]
areas = [calculate_area(shape) for shape in shapes]
print(areas)  # Output: [200, 78.5]


[200, 78.5]


In [10]:
# Summary

# Inheritance can be used to adhere to the Open/Closed Principle by allowing classes to be extended without modification.

# Polymorphism and the use of interfaces or abstract classes often provide a more flexible way to adhere to the Open/Closed Principle.

# The key idea of OCP is to design software modules that can be extended without changing existing code, which helps in maintaining and scaling applications.

In [12]:
# Liskov Substitution Principle (LSP):

# Subtypes must be substitutable for their base types without altering the correctness of the program.

In [15]:
class BaseDataProcessor:
    def process(self, data):
        pass

class DataProcessor(BaseDataProcessor): 
    def process(self, data):
        # Process data
        pass 

def use_processor(processor: BaseDataProcessor): # This is an example of Liskov Substitution Principle where we are substituting the base class with the derived class
    processor.process(data)

use_processor(DataProcessor())


NameError: name 'data' is not defined

In [23]:
# Interface Segregation Principle (ISP):

# Clients should not be forced to depend on methods they do not use. Suppose if a class has 10 methods and
# a client is using only 2 methods then it should not be forced to use all 10 methods to prevent it we use ISP to enclose methods in individual classes.

#It also follows the Single Responsibility Principle since each interface has only one responsibility.

In [22]:
from abc import ABC, abstractmethod

class DataLoader(ABC): # Interface segregation principle is used here where we are creating different interfaces for different functionalities
    @abstractmethod
    def load(self):
        pass

class DataSaver(ABC):
    @abstractmethod
    def save(self):
        pass

class DataHandler(DataLoader, DataSaver): # This is an example of multiple inheritance where we are inheriting from two different classes
    def load(self):
        # Load data
        pass

    def save(self):
        # Save data
        pass

In [25]:
# Dependency Inversion Principle (DIP):
# High-level modules should not depend on low-level modules. Both should depend on abstractions.

In [28]:
# Abstractions should not depend on details. Details should depend on abstractions.

from abc import ABC, abstractmethod

class DataSource(ABC):
    @abstractmethod
    def get_data(self):
        pass

class DatabaseSource(DataSource): 
    def get_data(self):
        # Fetch data from database
        pass

class DataProcessor:
    def __init__(self, data_source: DataSource): # This is an example of dependency inversion principle where we are injecting the dependency
        self.data_source = data_source # This is an example of dependency injection where were are using object from another abstract class to use its methods

    def process(self):
        data = self.data_source.get_data() # This is similar to factory pattern where we are using object of another class to use its methods
        # Process data

db_source = DatabaseSource()
processor = DataProcessor(db_source)
processor.process()


In [29]:
# Singleton Pattern: Ensures a class has only one instance and provides a global point of access to it.

In [30]:
# Factory Pattern: Provides an interface for creating objects in a superclass but allows subclasses to alter the type of objects that will be created.

In [31]:
from abc import ABC, abstractmethod

class DataSource(ABC):
    @abstractmethod
    def get_data(self):
        pass

class DatabaseSource(DataSource):
    def get_data(self):
        # Fetch data from database
        pass

class APISource(DataSource):
    def get_data(self):
        # Fetch data from API
        pass

class DataSourceFactory:
    @staticmethod
    def get_data_source(source_type): # This is similar to polymorphism where we are using different objects of different classes but they are of same type
        if source_type == 'database': # But factory pattern is used to create objects of different classes at the runtime based on the input provided. 
            return DatabaseSource()
        elif source_type == 'api':
            return APISource() 

data_source = DataSourceFactory.get_data_source('database')
data_source.get_data()


In [33]:
# Repository Pattern: Abstracts the data layer, making the data access logic agnostic to the rest of the application.

In [35]:
class DatabaseConnection:
    def query(self, query):
        # Execute the query
        pass

class UserRepository:
    def __init__(self, database):
        self.database = database

    def get_user(self, user_id):
        # Query database to get user
        return self.database.query(f"SELECT * FROM users WHERE id = {user_id}")

database = DatabaseConnection() # This is an example of object composition where we are using object of another class to use its methods
user_repository = UserRepository(database)
user = user_repository.get_user(1)


In [39]:
# Dependency Injection

# Dependency Injection (DI) is a design pattern used to implement IoC (Inversion of Control). It allows a class to receive its dependencies from an external 
# source rather (objects and methods of another class) than creating them itself.

In [45]:
class DataLoader:
    def __init__(self, data_source):
        self.data_source = data_source # This is an example of dependency injection where we are injecting the dependency of anoter class object

    def load_data(self):
        return self.data_source.get_data()

class DatabaseSource:
    def get_data(self):
        # Fetch data from database
        pass

database_source = DatabaseSource()
data_loader = DataLoader(database_source) # we are injecting the dependency of another class object during the object creation.
data_loader.load_data()

In [42]:
# Object Composition

# Object Composition is a way to combine objects or classes into more complex ones. It is used to model "has-a" relationships.

In [47]:
class Engine:
    def start(self):
        print("Engine started")

class Car:
    def __init__(self):
        self.engine = Engine() # This is an example of object composition where we are using object of another class to use its methods

    def start(self):
        self.engine.start()

car = Car()  # Notice here we are not directly using the Engine class object outside but it is used inside the Car class
car.start()  # Output: Engine started 

Engine started


In [49]:
# Data Transfer Objects (DTOs)

# Data Transfer Objects (DTOs) are simple objects that are used to transfer data between layers or components of an application.

In [50]:
class UserDTO:
    def __init__(self, user_id, name, email):
        self.user_id = user_id
        self.name = name
        self.email = email

# Usage
user_dto = UserDTO(1, "Alice", "alice@example.com")
print(user_dto.name)  # Output: Alice


Alice
