# Refactoring Example - Step 2: Packing in a class

```mermaid
flowchart TD
    A[Start Pipeline] --> B[Load Data]
    B --> C[Validate Schema]

    C --> D[Clean Data]
    
    D --> F[Transform Data]
    
    F --> G[Write to Database]

    G --> H[End Pipeline]
```

In [None]:
import pandas as pd
import sqlite3

class DataCleaningPipeline:
    def __init__(self, input_path="data/customers.csv", db_path="data/cleaned_customers.db"):
        self.input_path = input_path
        self.db_path = db_path
        self.df = pd.DataFrame()

    def load_data(self):
        self.df = pd.read_csv(self.input_path)
        print("Data loaded successfully.")

    def validate_columns(self):
        if not all(col in self.df.columns for col in ["id", "name", "email", "signup_date"]):
            print("Missing some columns...")

    def clean_data(self):
        self.df["email"] = self.df["email"].str.lower()
        self.df["signup_date"] = pd.to_datetime(self.df["signup_date"])
        self.df = self.df.dropna(subset=["id", "email"])
        print("Cleaning done.")

    def transform_data(self):
        self.df["days_since_signup"] = (pd.Timestamp.now() - self.df["signup_date"]).dt.days

    def save_to_database(self):
        conn = sqlite3.connect(self.db_path)
        self.df.to_sql("customers", conn, if_exists="replace", index=False)
        conn.close()
        print("Data written to database.")

    def run(self):
        print("Starting data cleaning pipeline...")
        self.load_data()
        self.validate_columns()
        self.clean_data()
        self.transform_data()
        self.save_to_database()
        print("Pipeline finished.")