# Refactoring Example - Step 1: Original code

In [None]:
import pandas as pd
import sqlite3

# Load data
df = pd.read_csv("data/customers.csv")
print("Data loaded successfully.")

In [None]:
# Validate columns
if not all(col in df.columns for col in ["id", "name", "email", "signup_date"]):
    print("Missing some columns...")

In [None]:
# Clean data
df["email"] = df["email"].str.lower()
df["signup_date"] = pd.to_datetime(df["signup_date"])
df = df.dropna(subset=["id", "email"])
print("Cleaning done.")

In [None]:
# Transform data
df["days_since_signup"] = (pd.Timestamp.now() - df["signup_date"]).dt.days

In [None]:
# Save to database
conn = sqlite3.connect("data/cleaned_customers.db")
df.to_sql("customers", conn, if_exists="replace", index=False)
conn.close()
print("Data written to database.")

In [None]:
print("Pipeline finished.")