In [None]:
import pandas as pd  # For data manipulation
import numpy as np  # For numerical operations
from sklearn.model_selection import train_test_split  # For splitting data into training and testing sets
from sklearn.linear_model import LogisticRegression  # For logistic regression model
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report  # For evaluating model performance
from sklearn.preprocessing import StandardScaler  # For scaling numerical features

# Load the credit card transaction data
data = pd.read_csv("credit_card_data.csv")

# Create useful features from the raw data
# This step might involve calculations, combining columns, or creating new columns
# Example: Calculate transaction amount per user per day
# ...

# Handle missing values (like empty cells) and outliers (extreme values)
# Example: Fill missing values with average, remove outliers
# ...

# Make sure all numbers are on a similar scale
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data)

# Separate the data into features (X) and the target (is_fraud or not)
X = data_scaled[:, :-1]  # All columns except the last one
y = data_scaled[:, -1]  # The last column (is_fraud)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create the logistic regression model
model = LogisticRegression()

# Train the model on the training data
model.fit(X_train, y_train)

# Make predictions on the testing data
predictions = model.predict(X_test)

# Check how accurate the model is
accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", accuracy)

# See how many correct and incorrect predictions were made
confusion = confusion_matrix(y_test, predictions)
print("Confusion Matrix:\n", confusion)

# Get a detailed report of the model's performance
report = classification_report(y_test, predictions)
print("Classification Report:\n", report)