Load the dataset from a CSV file.

In [None]:
import pandas as pd

data = pd.read_csv('data.csv')

Display an overview of the data, including types and summary statistics.

In [None]:
print(data.info())
print(data.describe())

Check for missing values in each column.

In [None]:
missing_values = data.isnull().sum()
print(missing_values)

Visualize the distribution of the fraud class.

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.countplot(x='fraud', data=data)
plt.show()

Create a heatmap to show the correlation between features.

In [None]:
correlation = data.corr()
sns.heatmap(correlation, annot=True)
plt.show()

Split the data into training and testing sets.

In [None]:
from sklearn.model_selection import train_test_split
X = data.drop('target', axis=1)
y = data['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

Perform undersampling to address class imbalance.

In [None]:
from imblearn.under_sampling import RandomUnderSampler
undersample = RandomUnderSampler()
X_res, y_res = undersample.fit_resample(X_train, y_train)

Show correlation after undersampling.

In [None]:
correlation_resampled = pd.DataFrame(X_res).corr()
sns.heatmap(correlation_resampled, annot=True)
plt.show()

Train a Logistic Regression model.

In [None]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(X_res, y_res)

Make predictions and calculate the confusion matrix for Logistic Regression.

In [None]:
y_pred_logistic = model.predict(X_test)
from sklearn.metrics import confusion_matrix, classification_report
confusion_logistic = confusion_matrix(y_test, y_pred_logistic)

Train a Decision Tree model.

In [None]:
from sklearn.tree import DecisionTreeClassifier
decision_tree_model = DecisionTreeClassifier()
decision_tree_model.fit(X_res, y_res)

Make predictions and calculate the confusion matrix for Decision Tree.

In [None]:
y_pred_tree = decision_tree_model.predict(X_test)
confusion_tree = confusion_matrix(y_test, y_pred_tree)

Summarize the results in a table format.

In [None]:
import pandas as pd
results = pd.DataFrame({'Logistic Regression Confusion Matrix': [confusion_logistic], 'Decision Tree Confusion Matrix': [confusion_tree]})
print(results)