# Refactoring Example - Step 2: Packing in a class

```mermaid
flowchart TD
    A[Start Pipeline] --> B[Load Data]
    B --> C[Validate Schema]

    C --> D[Clean Data]
    
    D --> F[Transform Data]
    
    F --> G[Write to Database]

    G --> H[End Pipeline]
```

In [None]:
import pandas as pd
import sqlite3

class DataCleaningPipeline:
    def _load_data(self, filepath: str) -> None:
        # Load data
        df: pd.DataFrame = pd.read_csv(filepath)
        print("Data loaded successfully.")
        self.df = df

    def _validate_columns(self) -> None:
        # Validate columns
        if not all(col in self.df.columns for col in ["id", "name", "email", "signup_date"]):
            print("Missing some columns...")
    
    def _clean_data(self) -> None:
        # Clean data - Data Cleaning
        self.df["email"] = self.df["email"].str.lower()
        self.df["signup_date"] = pd.to_datetime(self.df["signup_date"])
        self.df = self.df.dropna(subset=["id", "email"])
        print("Cleaning done.")

    def _transform_data(self) -> None:
        # Transform data - Feature Engineering
        self.df["days_since_signup"] = pd.Timestamp.now() - self.df["signup_date"].dt.days

    def _save_to_db(self) -> None:
        # Save to database
        conn = sqlite3.connect("data/cleaned_customers.db")
        df.to_sql("customers", conn, if_exists="replace", index=False)
        conn.close()
        print("Data written to database.")

    def run(self, filepath: str):
        # Run the pipeline
        self._load_data(filepath)
        self._validate_columns()
        self._clean_data()
        self._transform_data()
        self._save_to_db()

        print("Pipeline finished.")


filepath="data/customers.csv"



In [None]:
dcp = DataCleaningPipeline()

In [None]:
dcp = DataCleaningPipeline()
dcp._load_data(34)

In [None]:
from typing import Protocol

class HasSuperMethod(Protocol):
    def supermethod(self):
        pass

class DadClass():
    def supermethod(self):
        pass

class ChildClass1(DadClass):
    pass

class ChildClass2(DadClass):
    def supermethod(self):
        raise NotImplementedError

class ChildClass3:
    def supermethod(self):
        print("Hello world")

def megacool_function(thing):
    print("Calling supermethod")
    thing.supermethod()
    print("That was all")

thing = DadClass()

megacool_function(thing: HasSuperMethod)