In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score


In [None]:
# Load the dataset
credit_df = pd.read_csv("creditcard.csv")
credit_df.head(10)

In [None]:
# Statistical description of the dataset
description_stats = credit_df.describe()

In [None]:
# Check for missing values
missing_values = credit_df.isnull().sum()

In [None]:
# Separate normal and fraud transactions
normal_df = credit_df[credit_df.Class == 0]
fraud_df = credit_df[credit_df.Class == 1]
print("Normal Transactions Shape:", normal_df.shape)

In [None]:
# Statistical summary of transaction amounts for normal transactions
normal_amount_stats = normal_df.Amount.describe()
print("Normal Transactions Amount Stats:\n", normal_amount_stats)

In [None]:
# Statistical summary of transaction amounts for fraud transactions
fraud_amount_stats = fraud_df.Amount.describe()
print("Fraud Transactions Amount Stats:\n", fraud_amount_stats)

In [None]:
# Group by class and calculate the mean for each feature
class_means = credit_df.groupby('Class').mean()
print("Class Means:\n", class_means)

In [None]:
# Randomly sample normal transactions to match the number of fraud transactions
sampled_normal_df = normal_df.sample(n=492)

In [None]:
# Combine sampled normal transactions with fraud transactions
combined_df = pd.concat([sampled_normal_df, fraud_df], axis=0)

In [None]:
# Count the number of instances for each class in the combined dataset
combined_class_counts = combined_df['Class'].value_counts()
print("Combined Class Counts:\n", combined_class_counts)

In [None]:
# Group by class and calculate the mean for each feature in the combined dataset
combined_class_means = combined_df.groupby('Class').mean()
print("Combined Class Means:\n", combined_class_means)

In [None]:
# Split features (X) and target variable (Y)
X_combined = combined_df.drop(columns='Class', axis=1)
Y_combined = combined_df['Class']

In [None]:
# Split the combined dataset into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X_combined, Y_combined, test_size=0.2, stratify=Y_combined, random_state=2)


In [None]:
# Initialize and train the logistic regression model
logistic_model = LogisticRegression()
logistic_model.fit(X_train, Y_train)


In [None]:
# Predictions on training set
Y_train_pred = logistic_model.predict(X_train)
training_accuracy = accuracy_score(Y_train_pred, Y_train) * 100
print(f"Training Accuracy: {training_accuracy:.2f}%")

In [None]:
# Predictions on test set
Y_test_pred = logistic_model.predict(X_test)
test_accuracy = accuracy_score(Y_test_pred, Y_test) * 100
print(f"Test Accuracy: {test_accuracy:.2f}%")