In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from utils.data_processing import preprocess_data
from utils.feature_engineering import create_new_features
from utils.visualization import plot_confusion_matrix, plot_feature_importances

In [None]:
# Load Data
data_file = '../data/sample_data.csv'
data = pd.read_csv(data_file)

# Preview the data
print("Data Shape:", data.shape)
print(data.head())

In [None]:
# Preprocess Data
data = preprocess_data(data)

In [None]:
# Feature Engineering
data = create_new_features(data)

In [None]:
# Define the target variable and features
X = data.drop(columns=['default'])  # Assuming 'default' is the target variable
y = data['default']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict on the test data
y_pred = model.predict(X_test)

# Generate the confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print('Confusion Matrix:')
print(conf_matrix)

In [None]:
# Plot the confusion matrix
fig = plot_confusion_matrix(conf_matrix)
plt.show()

In [None]:
# Plot feature importances
importances = model.feature_importances_
feature_names = X.columns
fig = plot_feature_importances(importances, feature_names)
plt.show()
