In [127]:
import pandas as pd
import numpy as np

In [128]:
file_path = 'bank_marketing.csv'
data = pd.read_csv(file_path)

## Creating DataFrames

In [129]:
client_df = data[['client_id', 'age', 'job', 'marital', 'education', 'credit_default', 'mortgage']].copy()

In [130]:
campaign_df = data[['client_id', 'month', 'day', 'contact_duration', 'number_contacts', 'previous_campaign_contacts', 'previous_outcome', 'campaign_outcome']].copy()

In [131]:
economics_df = data[['client_id', 'cons_price_idx', 'euribor_three_months']].copy()

## Editing the client dataset

In [132]:

# Convert data types for optimization using .loc to avoid SettingWithCopyWarning
client_df.loc[:, 'education'] = client_df["education"].str.replace(".", "_")
client_df.loc[:, 'credit_default'] = client_df["credit_default"].replace("unknown", np.NaN)
client_df.loc[:, 'education'] = client_df["education"].replace("unknown", np.NaN)
client_df.loc[:, 'job'] = client_df["job"].str.replace(".", "")
client_df.loc[:, 'age'] = client_df['age'].astype('int32')
client_df.loc[:, 'job'] = client_df['job'].astype('category')
client_df.loc[:, 'marital'] = client_df['marital'].astype('category')
client_df.loc[:, 'education'] = client_df['education'].astype('category')
client_df.loc[:, 'credit_default'] = client_df['credit_default'].astype('category')
client_df.loc[:, 'mortgage'] = client_df['mortgage'].astype('category')

## Editing the campaign dataset

In [133]:
campaign_df['month'] = campaign_df["month"].str.capitalize().astype('category')

# Create the year column
campaign_df["year"] = "2022"

# Convert day column to string
campaign_df['day'] = campaign_df['day'].astype(str)

# Convert numeric columns
campaign_df['contact_duration'] = campaign_df['contact_duration'].astype('int32')
campaign_df['number_contacts'] = campaign_df['number_contacts'].astype('int32')
campaign_df['previous_campaign_contacts'] = campaign_df['previous_campaign_contacts'].astype('int32')

# Convert previous outcome and campaign outcome to categorical
campaign_df['previous_outcome'] = campaign_df['previous_outcome'].astype('category')
campaign_df['campaign_outcome'] = campaign_df['campaign_outcome'].astype('category')

# Convert categorical outcomes to boolean
for col in ["campaign_outcome", "previous_outcome"]:
    campaign_df[col] = campaign_df[col].astype(bool)

# Convert month column to string
campaign_df['month'] = campaign_df['month'].astype(str)

# Create last_contact_date column
campaign_df["last_contact_date"] = campaign_df["year"] + "-" + campaign_df["month"] + "-" + campaign_df["day"]
campaign_df["last_contact_date"] = pd.to_datetime(campaign_df["last_contact_date"], format="%Y-%b-%d")

# Drop unnecessary columns
campaign_df.drop(columns=["month", "day", "year"], inplace=True)

## Editing the economic dataset

In [134]:
economics_df.loc[:, 'cons_price_idx'] = economics_df['cons_price_idx'].astype('float32')
economics_df.loc[:, 'euribor_three_months'] = economics_df['euribor_three_months'].astype('float32')

## Saving the DataFrames to CSV files

In [135]:
client_df.to_csv('client.csv', index=False)
campaign_df.to_csv('campaign.csv', index=False)
economics_df.to_csv('economics.csv', index=False)

## Display DataFrame

In [136]:
print(client_df.head())

   client_id  age        job  marital    education credit_default mortgage
0          0   56  housemaid  married     basic_4y             no       no
1          1   57   services  married  high_school            NaN       no
2          2   37   services  married  high_school             no      yes
3          3   40      admin  married     basic_6y             no       no
4          4   56   services  married  high_school             no       no


In [137]:
print(campaign_df.head())

   client_id  contact_duration  number_contacts  previous_campaign_contacts  \
0          0               261                1                           0   
1          1               149                1                           0   
2          2               226                1                           0   
3          3               151                1                           0   
4          4               307                1                           0   

   previous_outcome  campaign_outcome last_contact_date  
0              True              True        2022-05-13  
1              True              True        2022-05-19  
2              True              True        2022-05-23  
3              True              True        2022-05-27  
4              True              True        2022-05-03  


In [138]:
print(economics_df.head())

   client_id  cons_price_idx  euribor_three_months
0          0       93.994003                 4.857
1          1       93.994003                 4.857
2          2       93.994003                 4.857
3          3       93.994003                 4.857
4          4       93.994003                 4.857
