### **Importing modules**

In [6]:
import pandas as pd # type: ignore
import numpy as np # type: ignore
from datetime import datetime
import scipy.stats as stats # type: ignore
import matplotlib.pyplot as plt # type: ignore
import os # type: ignore

### **Loading Dataset**

In [7]:
df_cards = pd.read_csv('../data/cards_data.csv')
df_users = pd.read_csv('../data/users_data.csv')

### **Performing Data Preprocessing**

**1. Droping card_on_dark_web column from cards dataset since all values are No**

In [8]:
df_cards.drop(columns=['card_on_dark_web'], inplace=True)

**2. Removing the $ sign from all Values of credit_limit, per_capita_income, yearly_income, total_debt to convert from string to float value**

In [9]:
df_cards['credit_limit'] = df_cards['credit_limit'].str.replace(r'[\$,]', '', regex=True).astype(float)
df_users['per_capita_income'] = df_users['per_capita_income'].str.replace(r'[\$,]', '', regex=True).astype(float)
df_users['yearly_income'] = df_users['yearly_income'].str.replace(r'[\$,]', '', regex=True).astype(float)
df_users['total_debt'] = df_users['total_debt'].str.replace(r'[\$,]', '', regex=True).astype(float)

**3. Converting the accr_open_date column from string to datetime type**

In [10]:
df_cards['acct_open_date'] = pd.to_datetime(df_cards['acct_open_date'], format="%m/%Y", errors='coerce')

### **Updating the csv files**

In [12]:
data_dir = '../data'
file_path_cards = os.path.join(data_dir, 'cards_data.csv')
file_path_users = os.path.join(data_dir, 'users_data.csv')
df_cards.to_csv(file_path_cards, index=False)
df_users.to_csv(file_path_users, index=False)