In [5]:
import pandas as pd
import numpy as np

# Load the dataset
marketing = pd.read_csv("bank.csv")

# Split into the three tables
client = marketing.loc[:, ["age", "job", "marital", "education", "default", "balance", "housing", "loan"]].copy()
campaign = marketing.loc[:, ["contact", "day", "month", "duration", "campaign", "pdays", "previous", "poutcome", "deposit"]].copy()
economics = marketing.loc[:, ["balance", "housing", "loan"]].copy()

# Clean education column
client.loc[:, "education"] = client["education"].str.replace(".", "_")
client.loc[:, "education"] = client["education"].replace("unknown", np.NaN)

# Clean job column
client.loc[:, "job"] = client["job"].str.replace(".", "_")

# Clean and convert client columns to bool data type
for col in ["default", "housing", "loan"]:
    client.loc[:, col] = client[col].map({"yes": 1, "no": 0, "unknown": 0})
    client.loc[:, col] = client[col].astype(bool)

# Editing the campaign dataset
# Change campaign_outcome to binary values
campaign.loc[:, "deposit"] = campaign["deposit"].map({"yes": 1, "no": 0})

# Convert previous_outcome to binary values
campaign.loc[:, "poutcome"] = campaign["poutcome"].map({"success": 1, "failure": 0, "unknown": 0})

# Add year column (assuming a hypothetical year)
campaign.loc[:, "year"] = "2022"

# Convert day to string
campaign.loc[:, "day"] = campaign["day"].astype(str)

# Add last_contact_date column
campaign.loc[:, "last_contact_date"] = campaign["year"] + "-" + campaign["month"] + "-" + campaign["day"]

# Convert to datetime
campaign.loc[:, "last_contact_date"] = pd.to_datetime(campaign["last_contact_date"], format="%Y-%b-%d")

# Clean and convert outcome columns to bool
for col in ["deposit", "poutcome"]:
    campaign.loc[:, col] = campaign[col].astype(bool)

# Drop unnecessary columns
campaign.drop(columns=["month", "day", "year"], inplace=True)

# Save tables to individual csv files
client.to_csv("client.csv", index=False)
campaign.to_csv("campaign.csv", index=False)
economics.to_csv("economics.csv", index=False)
